[turboshaft] Port LateEscapeAnalysis
Bug: v8:12783 Change-Id: Id5fa026d103dc67e05322b725f34186124bc5936 Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/4054621 Commit-Queue: Darius Mercadier <dmercadier@chromium.org> Reviewed-by: Tobias Tebbi <tebbi@chromium.org> Cr-Commit-Position: refs/heads/main@{#84603}
This commit is contained in:
parent
c618a17984
commit
fbcffa62b8
@ -2898,6 +2898,8 @@ filegroup(
|
||||
"src/compiler/turboshaft/index.h",
|
||||
"src/compiler/turboshaft/graph-visualizer.cc",
|
||||
"src/compiler/turboshaft/graph-visualizer.h",
|
||||
"src/compiler/turboshaft/late-escape-analysis-reducer.h",
|
||||
"src/compiler/turboshaft/late-escape-analysis-reducer.cc",
|
||||
"src/compiler/turboshaft/layered-hash-map.h",
|
||||
"src/compiler/turboshaft/machine-optimization-reducer.h",
|
||||
"src/compiler/turboshaft/memory-optimization.cc",
|
||||
|
2
BUILD.gn
2
BUILD.gn
@ -2955,6 +2955,7 @@ v8_header_set("v8_internal_headers") {
|
||||
"src/compiler/turboshaft/graph-visualizer.h",
|
||||
"src/compiler/turboshaft/graph.h",
|
||||
"src/compiler/turboshaft/index.h",
|
||||
"src/compiler/turboshaft/late-escape-analysis-reducer.h",
|
||||
"src/compiler/turboshaft/layered-hash-map.h",
|
||||
"src/compiler/turboshaft/machine-optimization-reducer.h",
|
||||
"src/compiler/turboshaft/memory-optimization.h",
|
||||
@ -4291,6 +4292,7 @@ v8_source_set("v8_turboshaft") {
|
||||
"src/compiler/turboshaft/graph-builder.cc",
|
||||
"src/compiler/turboshaft/graph-visualizer.cc",
|
||||
"src/compiler/turboshaft/graph.cc",
|
||||
"src/compiler/turboshaft/late-escape-analysis-reducer.cc",
|
||||
"src/compiler/turboshaft/memory-optimization.cc",
|
||||
"src/compiler/turboshaft/operations.cc",
|
||||
"src/compiler/turboshaft/optimization-phase.cc",
|
||||
|
@ -85,6 +85,7 @@
|
||||
#include "src/compiler/turboshaft/graph-builder.h"
|
||||
#include "src/compiler/turboshaft/graph-visualizer.h"
|
||||
#include "src/compiler/turboshaft/graph.h"
|
||||
#include "src/compiler/turboshaft/late-escape-analysis-reducer.h"
|
||||
#include "src/compiler/turboshaft/machine-optimization-reducer.h"
|
||||
#include "src/compiler/turboshaft/memory-optimization.h"
|
||||
#include "src/compiler/turboshaft/optimization-phase.h"
|
||||
@ -1964,8 +1965,7 @@ struct LateOptimizationPhase {
|
||||
|
||||
void Run(PipelineData* data, Zone* temp_zone) {
|
||||
if (data->HasTurboshaftGraph()) {
|
||||
// TODO(dmercadier,tebbi): port missing reducers (LateEscapeAnalysis and
|
||||
// CommonOperatorReducer) to turboshaft.
|
||||
// TODO(dmercadier,tebbi): add missing CommonOperatorReducer.
|
||||
turboshaft::OptimizationPhase<
|
||||
turboshaft::VariableReducer, turboshaft::BranchEliminationReducer,
|
||||
turboshaft::SelectLoweringReducer,
|
||||
@ -1994,8 +1994,8 @@ struct LateOptimizationPhase {
|
||||
JSGraphAssembler graph_assembler(data->jsgraph(), temp_zone,
|
||||
BranchSemantics::kMachine);
|
||||
SelectLowering select_lowering(&graph_assembler, data->graph());
|
||||
AddReducer(data, &graph_reducer, &escape_analysis);
|
||||
if (!v8_flags.turboshaft) {
|
||||
AddReducer(data, &graph_reducer, &escape_analysis);
|
||||
AddReducer(data, &graph_reducer, &branch_condition_elimination);
|
||||
}
|
||||
AddReducer(data, &graph_reducer, &dead_code_elimination);
|
||||
@ -2094,6 +2094,7 @@ struct OptimizeTurboshaftPhase {
|
||||
UnparkedScopeIfNeeded scope(data->broker(),
|
||||
v8_flags.turboshaft_trace_reduction);
|
||||
turboshaft::OptimizationPhase<
|
||||
turboshaft::LateEscapeAnalysisReducer,
|
||||
turboshaft::MemoryOptimizationReducer, turboshaft::VariableReducer,
|
||||
turboshaft::MachineOptimizationReducerSignallingNanImpossible,
|
||||
turboshaft::ValueNumberingReducer>::
|
||||
|
@ -53,6 +53,7 @@ class ReducerStack<Assembler, FirstReducer, Reducers...>
|
||||
template <class Assembler>
|
||||
class ReducerStack<Assembler> {
|
||||
public:
|
||||
using AssemblerType = Assembler;
|
||||
Assembler& Asm() { return *static_cast<Assembler*>(this); }
|
||||
};
|
||||
|
||||
@ -89,6 +90,7 @@ class ReducerBaseForwarder : public Next {
|
||||
// (Goto, Branch, Switch, CallAndCatchException), and takes care of updating
|
||||
// Block predecessors (and calls the Assembler to maintain split-edge form).
|
||||
// ReducerBase is always added by Assembler at the bottom of the reducer stack.
|
||||
// It also provides a default ShouldSkipOperation method that returns false.
|
||||
template <class Next>
|
||||
class ReducerBase : public ReducerBaseForwarder<Next> {
|
||||
public:
|
||||
@ -193,6 +195,15 @@ class ReducerBase : public ReducerBaseForwarder<Next> {
|
||||
Asm().AddPredecessor(saved_current_block, default_case, true);
|
||||
return new_opindex;
|
||||
}
|
||||
|
||||
template <class Op>
|
||||
bool ShouldSkipOperation(const Op& op, OpIndex) {
|
||||
if (op.saturated_use_count == 0 &&
|
||||
!op.Properties().is_required_when_unused) {
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
};
|
||||
|
||||
template <class Assembler>
|
||||
|
100
src/compiler/turboshaft/late-escape-analysis-reducer.cc
Normal file
100
src/compiler/turboshaft/late-escape-analysis-reducer.cc
Normal file
@ -0,0 +1,100 @@
|
||||
// Copyright 2022 the V8 project authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
#include "src/compiler/turboshaft/late-escape-analysis-reducer.h"
|
||||
|
||||
namespace v8::internal::compiler::turboshaft {
|
||||
|
||||
void LateEscapeAnalysisAnalyzer::Run() {
|
||||
CollectUsesAndAllocations();
|
||||
FindRemovableAllocations();
|
||||
}
|
||||
|
||||
void LateEscapeAnalysisAnalyzer::RecordAllocateUse(OpIndex alloc, OpIndex use) {
|
||||
auto [it, new_entry] = alloc_uses_.try_emplace(alloc, phase_zone_);
|
||||
auto& uses = it->second;
|
||||
if (new_entry) {
|
||||
uses.reserve(graph_.Get(alloc).saturated_use_count);
|
||||
}
|
||||
uses.push_back(use);
|
||||
}
|
||||
|
||||
// Collects the Allocate Operations and their uses.
|
||||
void LateEscapeAnalysisAnalyzer::CollectUsesAndAllocations() {
|
||||
for (auto& op : graph_.AllOperations()) {
|
||||
OpIndex op_index = graph_.Index(op);
|
||||
for (OpIndex input : op.inputs()) {
|
||||
if (graph_.Get(input).Is<AllocateOp>()) {
|
||||
RecordAllocateUse(input, op_index);
|
||||
}
|
||||
}
|
||||
if (op.Is<AllocateOp>()) {
|
||||
allocs_.push_back(op_index);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void LateEscapeAnalysisAnalyzer::FindRemovableAllocations() {
|
||||
while (!allocs_.empty()) {
|
||||
OpIndex current_alloc = allocs_.back();
|
||||
allocs_.pop_back();
|
||||
|
||||
if (ShouldSkipOperation(current_alloc)) {
|
||||
// We are re-visiting an allocation that we've actually already removed.
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!AllocationIsEscaping(current_alloc)) {
|
||||
MarkToRemove(current_alloc);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool LateEscapeAnalysisAnalyzer::AllocationIsEscaping(OpIndex alloc) {
|
||||
if (alloc_uses_.find(alloc) == alloc_uses_.end()) return false;
|
||||
for (OpIndex use : alloc_uses_.at(alloc)) {
|
||||
if (EscapesThroughUse(alloc, use)) return true;
|
||||
}
|
||||
// We haven't found any non-store use
|
||||
return false;
|
||||
}
|
||||
|
||||
// Returns true if {using_op_idx} is an operation that forces {alloc} to be
|
||||
// emitted.
|
||||
bool LateEscapeAnalysisAnalyzer::EscapesThroughUse(OpIndex alloc,
|
||||
OpIndex using_op_idx) {
|
||||
if (ShouldSkipOperation(alloc)) {
|
||||
// {using_op_idx} is an Allocate itself, which has been removed.
|
||||
return false;
|
||||
}
|
||||
const Operation& op = graph_.Get(using_op_idx);
|
||||
if (const StoreOp* store_op = op.TryCast<StoreOp>()) {
|
||||
// A StoreOp only makes {alloc} escape if it uses {alloc} as the {value} or
|
||||
// the {index}. Put otherwise, StoreOp makes {alloc} escape if it writes
|
||||
// {alloc}, but not if it writes **to** {alloc}.
|
||||
return store_op->value() == alloc;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
void LateEscapeAnalysisAnalyzer::MarkToRemove(OpIndex alloc) {
|
||||
operations_to_skip_.insert(alloc);
|
||||
if (alloc_uses_.find(alloc) == alloc_uses_.end()) {
|
||||
return;
|
||||
}
|
||||
|
||||
// The uses of {alloc} should also be skipped.
|
||||
for (OpIndex use : alloc_uses_.at(alloc)) {
|
||||
operations_to_skip_.insert(use);
|
||||
const StoreOp& store = graph_.Get(use).Cast<StoreOp>();
|
||||
if (graph_.Get(store.value()).Is<AllocateOp>()) {
|
||||
// This store was storing the result of an allocation. Because we now
|
||||
// removed this store, we might be able to remove the other allocation
|
||||
// as well.
|
||||
allocs_.push_back(store.value());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace v8::internal::compiler::turboshaft
|
90
src/compiler/turboshaft/late-escape-analysis-reducer.h
Normal file
90
src/compiler/turboshaft/late-escape-analysis-reducer.h
Normal file
@ -0,0 +1,90 @@
|
||||
// Copyright 2022 the V8 project authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
#ifndef V8_COMPILER_TURBOSHAFT_LATE_ESCAPE_ANALYSIS_REDUCER_H_
|
||||
#define V8_COMPILER_TURBOSHAFT_LATE_ESCAPE_ANALYSIS_REDUCER_H_
|
||||
|
||||
#include "src/compiler/turboshaft/assembler.h"
|
||||
#include "src/compiler/turboshaft/graph.h"
|
||||
#include "src/compiler/turboshaft/utils.h"
|
||||
#include "src/zone/zone-containers.h"
|
||||
#include "src/zone/zone.h"
|
||||
|
||||
namespace v8::internal::compiler::turboshaft {
|
||||
|
||||
// LateEscapeAnalysis removes allocation that have no uses besides the stores
|
||||
// initializing the object.
|
||||
|
||||
class LateEscapeAnalysisAnalyzer {
|
||||
public:
|
||||
LateEscapeAnalysisAnalyzer(const Graph& graph, Zone* zone)
|
||||
: graph_(graph),
|
||||
phase_zone_(zone),
|
||||
alloc_uses_(zone),
|
||||
allocs_(zone),
|
||||
operations_to_skip_(zone) {}
|
||||
|
||||
void Run();
|
||||
|
||||
bool ShouldSkipOperation(OpIndex index) {
|
||||
return operations_to_skip_.count(index) > 0;
|
||||
}
|
||||
|
||||
private:
|
||||
void RecordAllocateUse(OpIndex alloc, OpIndex use);
|
||||
|
||||
void CollectUsesAndAllocations();
|
||||
void FindRemovableAllocations();
|
||||
bool AllocationIsEscaping(OpIndex alloc);
|
||||
bool EscapesThroughUse(OpIndex alloc, OpIndex using_op_idx);
|
||||
void MarkToRemove(OpIndex alloc);
|
||||
|
||||
const Graph& graph_;
|
||||
Zone* phase_zone_;
|
||||
|
||||
// {alloc_uses_} records all the uses of each AllocateOp.
|
||||
ZoneUnorderedMap<OpIndex, ZoneVector<OpIndex>> alloc_uses_;
|
||||
// {allocs_} is filled with all of the AllocateOp of the graph, and then
|
||||
// iterated upon to determine which allocations can be removed and which
|
||||
// cannot.
|
||||
ZoneVector<OpIndex> allocs_;
|
||||
// {operations_to_skip_} contains all of the AllocateOp and StoreOp that can
|
||||
// be removed.
|
||||
ZoneUnorderedSet<OpIndex> operations_to_skip_;
|
||||
};
|
||||
|
||||
template <class Next>
|
||||
class LateEscapeAnalysisReducer : public Next {
|
||||
public:
|
||||
using Next::Asm;
|
||||
// We need the next line to not shadow Next's (and ReducerBase's in
|
||||
// particular) ShouldSkipOperation method.
|
||||
using Next::ShouldSkipOperation;
|
||||
|
||||
template <class... Args>
|
||||
explicit LateEscapeAnalysisReducer(const std::tuple<Args...>& args)
|
||||
: Next(args), analyzer_(Asm().input_graph(), Asm().phase_zone()) {}
|
||||
|
||||
void Analyze() {
|
||||
analyzer_.Run();
|
||||
Next::Analyze();
|
||||
}
|
||||
|
||||
bool ShouldSkipOperation(const StoreOp& op, OpIndex old_idx) {
|
||||
return analyzer_.ShouldSkipOperation(old_idx) ||
|
||||
Next::ShouldSkipOperation(op, old_idx);
|
||||
}
|
||||
|
||||
bool ShouldSkipOperation(const AllocateOp& op, OpIndex old_idx) {
|
||||
return analyzer_.ShouldSkipOperation(old_idx) ||
|
||||
Next::ShouldSkipOperation(op, old_idx);
|
||||
}
|
||||
|
||||
private:
|
||||
LateEscapeAnalysisAnalyzer analyzer_;
|
||||
};
|
||||
|
||||
} // namespace v8::internal::compiler::turboshaft
|
||||
|
||||
#endif // V8_COMPILER_TURBOSHAFT_LATE_ESCAPE_ANALYSIS_REDUCER_H_
|
@ -7,7 +7,7 @@
|
||||
#include "src/codegen/interface-descriptors-inl.h"
|
||||
#include "src/compiler/linkage.h"
|
||||
|
||||
namespace v8 ::internal::compiler::turboshaft {
|
||||
namespace v8::internal::compiler::turboshaft {
|
||||
|
||||
const TSCallDescriptor* CreateAllocateBuiltinDescriptor(Zone* zone) {
|
||||
return TSCallDescriptor::Create(
|
||||
@ -19,146 +19,4 @@ const TSCallDescriptor* CreateAllocateBuiltinDescriptor(Zone* zone) {
|
||||
zone);
|
||||
}
|
||||
|
||||
void MemoryAnalyzer::Run() {
|
||||
block_states[current_block] = BlockState{};
|
||||
BlockIndex end = BlockIndex(input_graph.block_count());
|
||||
while (current_block < end) {
|
||||
state = *block_states[current_block];
|
||||
auto operations_range =
|
||||
input_graph.operations(input_graph.Get(current_block));
|
||||
// Set the next block index here already, to allow it to be changed if
|
||||
// needed.
|
||||
current_block = BlockIndex(current_block.id() + 1);
|
||||
for (const Operation& op : operations_range) {
|
||||
Process(op);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void MemoryAnalyzer::Process(const Operation& op) {
|
||||
if (auto* alloc = op.TryCast<AllocateOp>()) {
|
||||
ProcessAllocation(*alloc);
|
||||
return;
|
||||
}
|
||||
if (auto* store = op.TryCast<StoreOp>()) {
|
||||
ProcessStore(input_graph.Index(op), store->base());
|
||||
return;
|
||||
}
|
||||
OpProperties properties = op.Properties();
|
||||
if (properties.can_allocate) {
|
||||
state = BlockState();
|
||||
}
|
||||
if (properties.is_block_terminator) {
|
||||
ProcessBlockTerminator(op);
|
||||
}
|
||||
}
|
||||
|
||||
// Update the successor block states based on the state of the current block.
|
||||
// For loop backedges, we need to re-start the analysis from the loop header
|
||||
// unless the backedge state is unchanged.
|
||||
void MemoryAnalyzer::ProcessBlockTerminator(const Operation& op) {
|
||||
if (auto* goto_op = op.TryCast<GotoOp>()) {
|
||||
if (input_graph.IsLoopBackedge(*goto_op)) {
|
||||
base::Optional<BlockState>& target_state =
|
||||
block_states[goto_op->destination->index()];
|
||||
BlockState old_state = *target_state;
|
||||
MergeCurrentStateIntoSuccessor(goto_op->destination);
|
||||
if (old_state != *target_state) {
|
||||
// We can never fold allocations inside of the loop into an
|
||||
// allocation before the loop, since this leads to unbounded
|
||||
// allocation size. An unknown `reserved_size` will prevent adding
|
||||
// allocations inside of the loop.
|
||||
target_state->reserved_size = base::nullopt;
|
||||
// Redo the analysis from the beginning of the loop.
|
||||
current_block = goto_op->destination->index();
|
||||
}
|
||||
return;
|
||||
} else if (goto_op->destination->IsLoop()) {
|
||||
// Look ahead to detect allocating loops earlier, avoiding a wrong
|
||||
// speculation resulting in processing the loop twice.
|
||||
for (const Operation& op :
|
||||
input_graph.operations(*goto_op->destination)) {
|
||||
if (op.Properties().can_allocate) {
|
||||
state = BlockState();
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
for (Block* successor : SuccessorBlocks(op)) {
|
||||
MergeCurrentStateIntoSuccessor(successor);
|
||||
}
|
||||
}
|
||||
|
||||
// We try to merge the new allocation into a previous dominating allocation.
|
||||
// We also allow folding allocations across blocks, as long as there is a
|
||||
// dominating relationship.
|
||||
void MemoryAnalyzer::ProcessAllocation(const AllocateOp& alloc) {
|
||||
if (ShouldSkipOptimizationStep()) return;
|
||||
base::Optional<uint64_t> new_size;
|
||||
if (auto* size = input_graph.Get(alloc.size()).TryCast<ConstantOp>()) {
|
||||
new_size = size->integral();
|
||||
}
|
||||
// If the new allocation has a static size and is of the same type, then we
|
||||
// can fold it into the previous allocation unless the folded allocation would
|
||||
// exceed `kMaxRegularHeapObjectSize`.
|
||||
if (state.last_allocation && new_size.has_value() &&
|
||||
state.reserved_size.has_value() &&
|
||||
alloc.type == state.last_allocation->type &&
|
||||
*new_size <= kMaxRegularHeapObjectSize - *state.reserved_size) {
|
||||
state.reserved_size =
|
||||
static_cast<uint32_t>(*state.reserved_size + *new_size);
|
||||
folded_into[&alloc] = state.last_allocation;
|
||||
uint32_t& max_reserved_size = reserved_size[state.last_allocation];
|
||||
max_reserved_size = std::max(max_reserved_size, *state.reserved_size);
|
||||
return;
|
||||
}
|
||||
state.last_allocation = &alloc;
|
||||
state.reserved_size = base::nullopt;
|
||||
if (new_size.has_value() && *new_size <= kMaxRegularHeapObjectSize) {
|
||||
state.reserved_size = static_cast<uint32_t>(*new_size);
|
||||
}
|
||||
// We might be re-visiting the current block. In this case, we need to remove
|
||||
// an allocation that can no longer be folded.
|
||||
reserved_size.erase(&alloc);
|
||||
folded_into.erase(&alloc);
|
||||
}
|
||||
|
||||
void MemoryAnalyzer::ProcessStore(OpIndex store, OpIndex object) {
|
||||
if (SkipWriteBarrier(input_graph.Get(object))) {
|
||||
skipped_write_barriers.insert(store);
|
||||
} else {
|
||||
// We might be re-visiting the current block. In this case, we need to
|
||||
// still update the information.
|
||||
skipped_write_barriers.erase(store);
|
||||
}
|
||||
}
|
||||
|
||||
void MemoryAnalyzer::MergeCurrentStateIntoSuccessor(const Block* successor) {
|
||||
base::Optional<BlockState>& target_state = block_states[successor->index()];
|
||||
if (!target_state.has_value()) {
|
||||
target_state = state;
|
||||
return;
|
||||
}
|
||||
// All predecessors need to have the same last allocation for us to continue
|
||||
// folding into it. This is only true when all the predecessors don't do any
|
||||
// allocations and have the same ancestor that does an allocation (and there
|
||||
// is no allocation on the path from the predecessors to their allocating
|
||||
// common ancestor).
|
||||
if (target_state->last_allocation != state.last_allocation) {
|
||||
target_state = BlockState();
|
||||
return;
|
||||
}
|
||||
// We take the maximum allocation size of all predecessors. If the size is
|
||||
// unknown because it is dynamic, we remember the allocation to eliminate
|
||||
// write barriers.
|
||||
if (target_state->reserved_size.has_value() &&
|
||||
state.reserved_size.has_value()) {
|
||||
target_state->reserved_size =
|
||||
std::max(*target_state->reserved_size, *state.reserved_size);
|
||||
} else {
|
||||
target_state->reserved_size = base::nullopt;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace v8::internal::compiler::turboshaft
|
||||
|
@ -11,7 +11,7 @@
|
||||
#include "src/compiler/turboshaft/assembler.h"
|
||||
#include "src/compiler/turboshaft/utils.h"
|
||||
|
||||
namespace v8 ::internal::compiler::turboshaft {
|
||||
namespace v8::internal::compiler::turboshaft {
|
||||
|
||||
const TSCallDescriptor* CreateAllocateBuiltinDescriptor(Zone* zone);
|
||||
|
||||
@ -27,11 +27,16 @@ const TSCallDescriptor* CreateAllocateBuiltinDescriptor(Zone* zone);
|
||||
// to satisfy all subsequent allocations.
|
||||
// We can do write barrier elimination across loops if the loop does not contain
|
||||
// any potentially allocating operations.
|
||||
template <class Assembler>
|
||||
struct MemoryAnalyzer {
|
||||
Zone* phase_zone;
|
||||
const Graph& input_graph;
|
||||
MemoryAnalyzer(Zone* phase_zone, const Graph& input_graph)
|
||||
: phase_zone(phase_zone), input_graph(input_graph) {}
|
||||
Assembler* assembler;
|
||||
MemoryAnalyzer(Zone* phase_zone, const Graph& input_graph,
|
||||
Assembler* assembler)
|
||||
: phase_zone(phase_zone),
|
||||
input_graph(input_graph),
|
||||
assembler(assembler) {}
|
||||
|
||||
struct BlockState {
|
||||
const AllocateOp* last_allocation = nullptr;
|
||||
@ -66,12 +71,13 @@ struct MemoryAnalyzer {
|
||||
}
|
||||
|
||||
bool IsFoldedAllocation(OpIndex op) {
|
||||
return folded_into.count(input_graph.Get(op).TryCast<AllocateOp>());
|
||||
return folded_into.count(
|
||||
input_graph.Get(op).template TryCast<AllocateOp>());
|
||||
}
|
||||
|
||||
base::Optional<uint32_t> ReservedSize(OpIndex alloc) {
|
||||
if (auto it =
|
||||
reserved_size.find(input_graph.Get(alloc).TryCast<AllocateOp>());
|
||||
if (auto it = reserved_size.find(
|
||||
input_graph.Get(alloc).template TryCast<AllocateOp>());
|
||||
it != reserved_size.end()) {
|
||||
return it->second;
|
||||
}
|
||||
@ -79,6 +85,7 @@ struct MemoryAnalyzer {
|
||||
}
|
||||
|
||||
void Run();
|
||||
|
||||
void Process(const Operation& op);
|
||||
void ProcessBlockTerminator(const Operation& op);
|
||||
void ProcessAllocation(const AllocateOp& alloc);
|
||||
@ -103,7 +110,7 @@ class MemoryOptimizationReducer : public Next {
|
||||
isolate_(std::get<MemoryOptimizationReducerArgs>(args).isolate) {}
|
||||
|
||||
void Analyze() {
|
||||
analyzer_.emplace(Asm().phase_zone(), Asm().input_graph());
|
||||
analyzer_.emplace(Asm().phase_zone(), Asm().input_graph(), &Asm());
|
||||
analyzer_->Run();
|
||||
Next::Analyze();
|
||||
}
|
||||
@ -248,7 +255,7 @@ class MemoryOptimizationReducer : public Next {
|
||||
}
|
||||
|
||||
private:
|
||||
base::Optional<MemoryAnalyzer> analyzer_;
|
||||
base::Optional<MemoryAnalyzer<typename Next::AssemblerType>> analyzer_;
|
||||
Isolate* isolate_;
|
||||
const TSCallDescriptor* allocate_builtin_descriptor_ = nullptr;
|
||||
|
||||
@ -261,6 +268,161 @@ class MemoryOptimizationReducer : public Next {
|
||||
}
|
||||
};
|
||||
|
||||
template <class Assembler>
|
||||
inline void MemoryAnalyzer<Assembler>::Run() {
|
||||
block_states[current_block] = BlockState{};
|
||||
BlockIndex end = BlockIndex(input_graph.block_count());
|
||||
while (current_block < end) {
|
||||
state = *block_states[current_block];
|
||||
auto operations_range =
|
||||
input_graph.operations(input_graph.Get(current_block));
|
||||
// Set the next block index here already, to allow it to be changed if
|
||||
// needed.
|
||||
current_block = BlockIndex(current_block.id() + 1);
|
||||
for (const Operation& op : operations_range) {
|
||||
Process(op);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <class Assembler>
|
||||
inline void MemoryAnalyzer<Assembler>::Process(const Operation& op) {
|
||||
if (assembler->ShouldSkipOperation(op, input_graph.Index(op))) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (auto* alloc = op.TryCast<AllocateOp>()) {
|
||||
ProcessAllocation(*alloc);
|
||||
return;
|
||||
}
|
||||
if (auto* store = op.TryCast<StoreOp>()) {
|
||||
ProcessStore(input_graph.Index(op), store->base());
|
||||
return;
|
||||
}
|
||||
OpProperties properties = op.Properties();
|
||||
if (properties.can_allocate) {
|
||||
state = BlockState();
|
||||
}
|
||||
if (properties.is_block_terminator) {
|
||||
ProcessBlockTerminator(op);
|
||||
}
|
||||
}
|
||||
|
||||
// Update the successor block states based on the state of the current block.
|
||||
// For loop backedges, we need to re-start the analysis from the loop header
|
||||
// unless the backedge state is unchanged.
|
||||
template <class Assembler>
|
||||
inline void MemoryAnalyzer<Assembler>::ProcessBlockTerminator(
|
||||
const Operation& op) {
|
||||
if (auto* goto_op = op.TryCast<GotoOp>()) {
|
||||
if (input_graph.IsLoopBackedge(*goto_op)) {
|
||||
base::Optional<BlockState>& target_state =
|
||||
block_states[goto_op->destination->index()];
|
||||
BlockState old_state = *target_state;
|
||||
MergeCurrentStateIntoSuccessor(goto_op->destination);
|
||||
if (old_state != *target_state) {
|
||||
// We can never fold allocations inside of the loop into an
|
||||
// allocation before the loop, since this leads to unbounded
|
||||
// allocation size. An unknown `reserved_size` will prevent adding
|
||||
// allocations inside of the loop.
|
||||
target_state->reserved_size = base::nullopt;
|
||||
// Redo the analysis from the beginning of the loop.
|
||||
current_block = goto_op->destination->index();
|
||||
}
|
||||
return;
|
||||
} else if (goto_op->destination->IsLoop()) {
|
||||
// Look ahead to detect allocating loops earlier, avoiding a wrong
|
||||
// speculation resulting in processing the loop twice.
|
||||
for (const Operation& op :
|
||||
input_graph.operations(*goto_op->destination)) {
|
||||
if (op.Properties().can_allocate &&
|
||||
!assembler->ShouldSkipOperation(op, input_graph.Index(op))) {
|
||||
state = BlockState();
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
for (Block* successor : SuccessorBlocks(op)) {
|
||||
MergeCurrentStateIntoSuccessor(successor);
|
||||
}
|
||||
}
|
||||
|
||||
// We try to merge the new allocation into a previous dominating allocation.
|
||||
// We also allow folding allocations across blocks, as long as there is a
|
||||
// dominating relationship.
|
||||
template <class Assembler>
|
||||
inline void MemoryAnalyzer<Assembler>::ProcessAllocation(
|
||||
const AllocateOp& alloc) {
|
||||
if (ShouldSkipOptimizationStep()) return;
|
||||
base::Optional<uint64_t> new_size;
|
||||
if (auto* size =
|
||||
input_graph.Get(alloc.size()).template TryCast<ConstantOp>()) {
|
||||
new_size = size->integral();
|
||||
}
|
||||
// If the new allocation has a static size and is of the same type, then we
|
||||
// can fold it into the previous allocation unless the folded allocation would
|
||||
// exceed `kMaxRegularHeapObjectSize`.
|
||||
if (state.last_allocation && new_size.has_value() &&
|
||||
state.reserved_size.has_value() &&
|
||||
alloc.type == state.last_allocation->type &&
|
||||
*new_size <= kMaxRegularHeapObjectSize - *state.reserved_size) {
|
||||
state.reserved_size =
|
||||
static_cast<uint32_t>(*state.reserved_size + *new_size);
|
||||
folded_into[&alloc] = state.last_allocation;
|
||||
uint32_t& max_reserved_size = reserved_size[state.last_allocation];
|
||||
max_reserved_size = std::max(max_reserved_size, *state.reserved_size);
|
||||
return;
|
||||
}
|
||||
state.last_allocation = &alloc;
|
||||
state.reserved_size = base::nullopt;
|
||||
if (new_size.has_value() && *new_size <= kMaxRegularHeapObjectSize) {
|
||||
state.reserved_size = static_cast<uint32_t>(*new_size);
|
||||
}
|
||||
// We might be re-visiting the current block. In this case, we need to remove
|
||||
// an allocation that can no longer be folded.
|
||||
reserved_size.erase(&alloc);
|
||||
folded_into.erase(&alloc);
|
||||
}
|
||||
|
||||
template <class Assembler>
|
||||
inline void MemoryAnalyzer<Assembler>::ProcessStore(OpIndex store,
|
||||
OpIndex object) {
|
||||
if (SkipWriteBarrier(input_graph.Get(object))) {
|
||||
skipped_write_barriers.insert(store);
|
||||
} else {
|
||||
// We might be re-visiting the current block. In this case, we need to
|
||||
// still update the information.
|
||||
skipped_write_barriers.erase(store);
|
||||
}
|
||||
}
|
||||
|
||||
template <class Assembler>
|
||||
inline void MemoryAnalyzer<Assembler>::MergeCurrentStateIntoSuccessor(
|
||||
const Block* successor) {
|
||||
base::Optional<BlockState>& target_state = block_states[successor->index()];
|
||||
if (!target_state.has_value()) {
|
||||
target_state = state;
|
||||
return;
|
||||
}
|
||||
// All predecessors need to have the same last allocation for us to continue
|
||||
// folding into it.
|
||||
if (target_state->last_allocation != state.last_allocation) {
|
||||
target_state = BlockState();
|
||||
return;
|
||||
}
|
||||
// We take the maximum allocation size of all predecessors. If the size is
|
||||
// unknown because it is dynamic, we remember the allocation to eliminate
|
||||
// write barriers.
|
||||
if (target_state->reserved_size.has_value() &&
|
||||
state.reserved_size.has_value()) {
|
||||
target_state->reserved_size =
|
||||
std::max(*target_state->reserved_size, *state.reserved_size);
|
||||
} else {
|
||||
target_state->reserved_size = base::nullopt;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace v8::internal::compiler::turboshaft
|
||||
|
||||
#endif // V8_COMPILER_TURBOSHAFT_MEMORY_OPTIMIZATION_H_
|
||||
|
@ -312,15 +312,14 @@ class GraphVisitor {
|
||||
assembler().output_graph().next_operation_index();
|
||||
USE(first_output_index);
|
||||
const Operation& op = input_graph().Get(index);
|
||||
if (op.saturated_use_count == 0 &&
|
||||
!op.Properties().is_required_when_unused) {
|
||||
if constexpr (trace_reduction) TraceOperationUnused();
|
||||
return true;
|
||||
}
|
||||
if constexpr (trace_reduction) TraceReductionStart(index);
|
||||
OpIndex new_index;
|
||||
if (input_block->IsLoop() && op.Is<PhiOp>()) {
|
||||
const PhiOp& phi = op.Cast<PhiOp>();
|
||||
if (assembler().ShouldSkipOperation(phi, index)) {
|
||||
if constexpr (trace_reduction) TraceOperationSkipped();
|
||||
return true;
|
||||
}
|
||||
new_index = assembler().PendingLoopPhi(MapToNewGraph(phi.inputs()[0]),
|
||||
phi.rep, phi.inputs()[1]);
|
||||
CreateOldToNewMapping(index, new_index);
|
||||
@ -329,12 +328,16 @@ class GraphVisitor {
|
||||
}
|
||||
} else {
|
||||
switch (op.opcode) {
|
||||
#define EMIT_INSTR_CASE(Name) \
|
||||
case Opcode::k##Name: \
|
||||
new_index = this->Visit##Name(op.Cast<Name##Op>()); \
|
||||
if (CanBeUsedAsInput(op.Cast<Name##Op>())) { \
|
||||
CreateOldToNewMapping(index, new_index); \
|
||||
} \
|
||||
#define EMIT_INSTR_CASE(Name) \
|
||||
case Opcode::k##Name: \
|
||||
if (assembler().ShouldSkipOperation(op.Cast<Name##Op>(), index)) { \
|
||||
if constexpr (trace_reduction) TraceOperationSkipped(); \
|
||||
return true; \
|
||||
} \
|
||||
new_index = this->Visit##Name(op.Cast<Name##Op>()); \
|
||||
if (CanBeUsedAsInput(op.Cast<Name##Op>())) { \
|
||||
CreateOldToNewMapping(index, new_index); \
|
||||
} \
|
||||
break;
|
||||
TURBOSHAFT_OPERATION_LIST(EMIT_INSTR_CASE)
|
||||
#undef EMIT_INSTR_CASE
|
||||
@ -351,7 +354,7 @@ class GraphVisitor {
|
||||
<< PaddingSpace{5 - CountDecimalDigits(index.id())}
|
||||
<< OperationPrintStyle{input_graph().Get(index), "#o"} << "\n";
|
||||
}
|
||||
void TraceOperationUnused() { std::cout << "╰─> unused\n\n"; }
|
||||
void TraceOperationSkipped() { std::cout << "╰─> skipped\n\n"; }
|
||||
void TraceBlockUnreachable() { std::cout << "╰─> unreachable\n\n"; }
|
||||
void TraceReductionResult(Block* current_block, OpIndex first_output_index,
|
||||
OpIndex new_index) {
|
||||
|
Loading…
Reference in New Issue
Block a user