[turboshaft] add basic optimization phase: liveness analysis
Bug: v8:12783 Change-Id: I15cf16bd66a97c33170ca4f1f5e3acc6ff9bf956 Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3576129 Auto-Submit: Tobias Tebbi <tebbi@chromium.org> Commit-Queue: Tobias Tebbi <tebbi@chromium.org> Reviewed-by: Nico Hartmann <nicohartmann@chromium.org> Cr-Commit-Position: refs/heads/main@{#80618}
This commit is contained in:
parent
de877f7497
commit
ecc0bc8f35
@ -2828,6 +2828,8 @@ filegroup(
|
||||
"src/compiler/turboshaft/graph.h",
|
||||
"src/compiler/turboshaft/operations.cc",
|
||||
"src/compiler/turboshaft/operations.h",
|
||||
"src/compiler/turboshaft/optimization-phase.cc",
|
||||
"src/compiler/turboshaft/optimization-phase.h",
|
||||
"src/compiler/turboshaft/recreate-schedule.cc",
|
||||
"src/compiler/turboshaft/recreate-schedule.h",
|
||||
"src/compiler/type-cache.cc",
|
||||
|
2
BUILD.gn
2
BUILD.gn
@ -2905,6 +2905,7 @@ v8_header_set("v8_internal_headers") {
|
||||
"src/compiler/turboshaft/graph-builder.h",
|
||||
"src/compiler/turboshaft/graph.h",
|
||||
"src/compiler/turboshaft/operations.h",
|
||||
"src/compiler/turboshaft/optimization-phase.h",
|
||||
"src/compiler/turboshaft/recreate-schedule.h",
|
||||
"src/compiler/type-cache.h",
|
||||
"src/compiler/type-narrowing-reducer.h",
|
||||
@ -4106,6 +4107,7 @@ v8_source_set("v8_turboshaft") {
|
||||
"src/compiler/turboshaft/graph-builder.cc",
|
||||
"src/compiler/turboshaft/graph.cc",
|
||||
"src/compiler/turboshaft/operations.cc",
|
||||
"src/compiler/turboshaft/optimization-phase.cc",
|
||||
"src/compiler/turboshaft/recreate-schedule.cc",
|
||||
]
|
||||
|
||||
|
@ -78,6 +78,7 @@
|
||||
#include "src/compiler/turboshaft/assembler.h"
|
||||
#include "src/compiler/turboshaft/graph-builder.h"
|
||||
#include "src/compiler/turboshaft/graph.h"
|
||||
#include "src/compiler/turboshaft/optimization-phase.h"
|
||||
#include "src/compiler/turboshaft/recreate-schedule.h"
|
||||
#include "src/compiler/type-narrowing-reducer.h"
|
||||
#include "src/compiler/typed-optimization.h"
|
||||
@ -2011,7 +2012,7 @@ struct BranchConditionDuplicationPhase {
|
||||
};
|
||||
|
||||
struct BuildTurboshaftPhase {
|
||||
DECL_PIPELINE_PHASE_CONSTANTS(BuildTurboShaft)
|
||||
DECL_PIPELINE_PHASE_CONSTANTS(BuildTurboshaft)
|
||||
|
||||
void Run(PipelineData* data, Zone* temp_zone) {
|
||||
turboshaft::BuildGraph(data->schedule(), data->graph_zone(), temp_zone,
|
||||
@ -2020,6 +2021,16 @@ struct BuildTurboshaftPhase {
|
||||
}
|
||||
};
|
||||
|
||||
struct OptimizeTurboshaftPhase {
|
||||
DECL_PIPELINE_PHASE_CONSTANTS(OptimizeTurboshaft)
|
||||
|
||||
void Run(PipelineData* data, Zone* temp_zone) {
|
||||
turboshaft::OptimizationPhase<
|
||||
turboshaft::LivenessAnalyzer,
|
||||
turboshaft::Assembler>::Run(&data->turboshaft_graph(), temp_zone);
|
||||
}
|
||||
};
|
||||
|
||||
struct TurboshaftRecreateSchedulePhase {
|
||||
DECL_PIPELINE_PHASE_CONSTANTS(TurboshaftRecreateSchedule)
|
||||
|
||||
@ -2860,10 +2871,12 @@ bool PipelineImpl::OptimizeGraph(Linkage* linkage) {
|
||||
AllowHandleDereference allow_deref;
|
||||
CodeTracer::StreamScope tracing_scope(data->GetCodeTracer());
|
||||
tracing_scope.stream()
|
||||
<< "\n-- TurboShaft Graph ----------------------------\n"
|
||||
<< "\n-- Turboshaft Graph ----------------------------\n"
|
||||
<< data->turboshaft_graph();
|
||||
}
|
||||
|
||||
Run<OptimizeTurboshaftPhase>();
|
||||
|
||||
Run<TurboshaftRecreateSchedulePhase>(linkage);
|
||||
if (data->info()->trace_turbo_graph() || FLAG_trace_turbo_scheduler) {
|
||||
UnparkedScopeIfNeeded scope(data->broker());
|
||||
|
@ -170,7 +170,8 @@ void GraphBuilder::Run() {
|
||||
DCHECK_EQ(block->SuccessorCount(), 1);
|
||||
Block* destination = Map(block->SuccessorAt(0));
|
||||
assembler.Goto(destination);
|
||||
if (destination->IsLoop()) {
|
||||
if (destination->IsBound()) {
|
||||
DCHECK(destination->IsLoop());
|
||||
FixLoopPhis(destination, target_block);
|
||||
}
|
||||
break;
|
||||
|
@ -22,7 +22,7 @@ namespace v8::internal::compiler::turboshaft {
|
||||
class Assembler;
|
||||
class VarAssembler;
|
||||
|
||||
// `OperationBuffer` is a growable, Zone-allocated buffer to store TurboShaft
|
||||
// `OperationBuffer` is a growable, Zone-allocated buffer to store Turboshaft
|
||||
// operations. It is part of a `Graph`.
|
||||
// The buffer can be seen as an array of 8-byte `OperationStorageSlot` values.
|
||||
// The structure is append-only, that is, we only add operations at the end.
|
||||
@ -220,8 +220,19 @@ class Block {
|
||||
return result;
|
||||
}
|
||||
|
||||
Block* LastPredecessor() const { return last_predecessor_; }
|
||||
Block* NeighboringPredecessor() const { return neighboring_predecessor_; }
|
||||
bool HasPredecessors() const { return last_predecessor_ != nullptr; }
|
||||
|
||||
// The block from the previous graph which produced the current block. This is
|
||||
// used for translating phi nodes from the previous graph.
|
||||
void SetOrigin(const Block* origin) {
|
||||
DCHECK_NULL(origin_);
|
||||
DCHECK_NE(origin->graph_, graph_);
|
||||
origin_ = origin;
|
||||
}
|
||||
const Block* Origin() const { return origin_; }
|
||||
|
||||
OpIndex begin() const {
|
||||
DCHECK(begin_.valid());
|
||||
return begin_;
|
||||
@ -243,6 +254,7 @@ class Block {
|
||||
BlockIndex index_ = BlockIndex::Invalid();
|
||||
Block* last_predecessor_ = nullptr;
|
||||
Block* neighboring_predecessor_ = nullptr;
|
||||
const Block* origin_ = nullptr;
|
||||
#ifdef DEBUG
|
||||
Graph* graph_ = nullptr;
|
||||
#endif
|
||||
@ -342,7 +354,7 @@ class Graph {
|
||||
return result;
|
||||
}
|
||||
|
||||
bool Add(Block* block) {
|
||||
V8_INLINE bool Add(Block* block) {
|
||||
DCHECK_EQ(block->graph_, this);
|
||||
if (!bound_blocks_.empty() && !block->HasPredecessors()) return false;
|
||||
bool deferred = true;
|
||||
@ -435,12 +447,16 @@ class Graph {
|
||||
|
||||
base::iterator_range<ConstOperationIterator> operations(OpIndex begin,
|
||||
OpIndex end) const {
|
||||
DCHECK(begin.valid());
|
||||
DCHECK(end.valid());
|
||||
return {ConstOperationIterator(begin, this),
|
||||
ConstOperationIterator(end, this)};
|
||||
}
|
||||
|
||||
base::iterator_range<MutableOperationIterator> operations(OpIndex begin,
|
||||
OpIndex end) {
|
||||
DCHECK(begin.valid());
|
||||
DCHECK(end.valid());
|
||||
return {MutableOperationIterator(begin, this),
|
||||
MutableOperationIterator(end, this)};
|
||||
}
|
||||
|
@ -220,7 +220,7 @@ struct OpProperties {
|
||||
}
|
||||
};
|
||||
|
||||
// Baseclass for all TurboShaft operations.
|
||||
// Baseclass for all Turboshaft operations.
|
||||
// The `alignas(OpIndex)` is necessary because it is followed by an array of
|
||||
// `OpIndex` inputs.
|
||||
struct alignas(OpIndex) Operation {
|
||||
@ -308,24 +308,25 @@ struct OperationT : Operation {
|
||||
|
||||
static constexpr OpProperties properties() { return Derived::properties; }
|
||||
|
||||
Derived& derived_this() { return *static_cast<Derived*>(this); }
|
||||
const Derived& derived_this() const {
|
||||
return *static_cast<const Derived*>(this);
|
||||
}
|
||||
|
||||
// Shadow Operation::inputs to exploit static knowledge about object size.
|
||||
base::Vector<OpIndex> inputs() {
|
||||
return {reinterpret_cast<OpIndex*>(reinterpret_cast<char*>(this) +
|
||||
sizeof(Derived)),
|
||||
input_count};
|
||||
derived_this().input_count};
|
||||
}
|
||||
base::Vector<const OpIndex> inputs() const {
|
||||
return {reinterpret_cast<const OpIndex*>(
|
||||
reinterpret_cast<const char*>(this) + sizeof(Derived)),
|
||||
input_count};
|
||||
derived_this().input_count};
|
||||
}
|
||||
|
||||
V8_INLINE OpIndex& input(size_t i) {
|
||||
return static_cast<Derived*>(this)->inputs()[i];
|
||||
}
|
||||
V8_INLINE OpIndex input(size_t i) const {
|
||||
return static_cast<const Derived*>(this)->inputs()[i];
|
||||
}
|
||||
V8_INLINE OpIndex& input(size_t i) { return derived_this().inputs()[i]; }
|
||||
V8_INLINE OpIndex input(size_t i) const { return derived_this().inputs()[i]; }
|
||||
|
||||
static size_t StorageSlotCount(size_t input_count) {
|
||||
// The operation size in bytes is:
|
||||
@ -373,8 +374,8 @@ struct OperationT : Operation {
|
||||
|
||||
bool operator==(const Derived& other) const {
|
||||
const Derived& derived = *static_cast<const Derived*>(this);
|
||||
if (derived.inputs() != other.inputs()) return false;
|
||||
return derived.options() == other.options();
|
||||
return derived.inputs() == other.inputs() &&
|
||||
derived.options() == other.options();
|
||||
}
|
||||
size_t hash_value() const {
|
||||
const Derived& derived = *static_cast<const Derived*>(this);
|
||||
@ -382,7 +383,7 @@ struct OperationT : Operation {
|
||||
}
|
||||
|
||||
void PrintOptions(std::ostream& os) const {
|
||||
const auto& options = static_cast<const Derived*>(this)->options();
|
||||
const auto& options = derived_this().options();
|
||||
constexpr size_t options_count =
|
||||
std::tuple_size<std::remove_reference_t<decltype(options)>>::value;
|
||||
if (options_count == 0) {
|
||||
@ -411,19 +412,8 @@ struct FixedArityOperationT : OperationT<Derived> {
|
||||
// Enable concise base access in derived struct.
|
||||
using Base = FixedArityOperationT;
|
||||
|
||||
// Shadow OperationT<Derived>::inputs to exploit static knowledge about input
|
||||
// count.
|
||||
// Shadow Operation::input_count to exploit static knowledge.
|
||||
static constexpr uint16_t input_count = InputCount;
|
||||
base::Vector<OpIndex> inputs() {
|
||||
return {reinterpret_cast<OpIndex*>(reinterpret_cast<char*>(this) +
|
||||
sizeof(Derived)),
|
||||
InputCount};
|
||||
}
|
||||
base::Vector<const OpIndex> inputs() const {
|
||||
return {reinterpret_cast<const OpIndex*>(
|
||||
reinterpret_cast<const char*>(this) + sizeof(Derived)),
|
||||
InputCount};
|
||||
}
|
||||
|
||||
template <class... Args>
|
||||
explicit FixedArityOperationT(Args... args)
|
||||
@ -434,12 +424,6 @@ struct FixedArityOperationT : OperationT<Derived> {
|
||||
((inputs[i++] = args), ...);
|
||||
}
|
||||
|
||||
bool operator==(const Derived& other) const {
|
||||
return std::equal(inputs().begin(), inputs().end(),
|
||||
other.inputs().begin()) &&
|
||||
static_cast<const Derived*>(this)->options() == other.options();
|
||||
}
|
||||
|
||||
// Redefine the input initialization to tell C++ about the static input size.
|
||||
template <class... Args>
|
||||
static Derived& New(Graph* graph, Args... args) {
|
||||
@ -695,6 +679,8 @@ struct PhiOp : OperationT<PhiOp> {
|
||||
|
||||
static constexpr OpProperties properties = OpProperties::Pure();
|
||||
|
||||
static constexpr size_t kLoopPhiBackEdgeIndex = 1;
|
||||
|
||||
explicit PhiOp(base::Vector<const OpIndex> inputs, MachineRepresentation rep)
|
||||
: Base(inputs), rep(rep) {}
|
||||
auto options() const { return std::tuple{rep}; }
|
||||
@ -705,7 +691,7 @@ struct PhiOp : OperationT<PhiOp> {
|
||||
struct PendingLoopPhiOp : FixedArityOperationT<1, PendingLoopPhiOp> {
|
||||
MachineRepresentation rep;
|
||||
union {
|
||||
// Used when transforming a TurboShaft graph.
|
||||
// Used when transforming a Turboshaft graph.
|
||||
// This is not an input because it refers to the old graph.
|
||||
OpIndex old_backedge_index = OpIndex::Invalid();
|
||||
// Used when translating from sea-of-nodes.
|
||||
@ -896,6 +882,8 @@ struct ConstantOp : FixedArityOperationT<0, ConstantOp> {
|
||||
}
|
||||
}
|
||||
|
||||
auto options() const { return std::tuple{kind, storage}; }
|
||||
|
||||
void PrintOptions(std::ostream& os) const;
|
||||
size_t hash_value() const {
|
||||
switch (kind) {
|
||||
@ -985,7 +973,7 @@ struct IndexedLoadOp : FixedArityOperationT<2, IndexedLoadOp> {
|
||||
offset(offset) {}
|
||||
void PrintOptions(std::ostream& os) const;
|
||||
auto options() const {
|
||||
return std::tuple{kind, loaded_rep, element_size_log2, offset};
|
||||
return std::tuple{kind, loaded_rep, offset, element_size_log2};
|
||||
}
|
||||
};
|
||||
|
||||
@ -1047,8 +1035,8 @@ struct IndexedStoreOp : FixedArityOperationT<3, IndexedStoreOp> {
|
||||
offset(offset) {}
|
||||
void PrintOptions(std::ostream& os) const;
|
||||
auto options() const {
|
||||
return std::tuple{kind, stored_rep, write_barrier, element_size_log2,
|
||||
offset};
|
||||
return std::tuple{kind, stored_rep, write_barrier, offset,
|
||||
element_size_log2};
|
||||
}
|
||||
};
|
||||
|
||||
|
26
src/compiler/turboshaft/optimization-phase.cc
Normal file
26
src/compiler/turboshaft/optimization-phase.cc
Normal file
@ -0,0 +1,26 @@
|
||||
// Copyright 2022 the V8 project authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
#include "src/compiler/turboshaft/optimization-phase.h"
|
||||
|
||||
namespace v8::internal::compiler::turboshaft {
|
||||
|
||||
int CountDecimalDigits(uint32_t value) {
|
||||
int result = 1;
|
||||
while (value > 9) {
|
||||
result++;
|
||||
value = value / 10;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
std::ostream& operator<<(std::ostream& os, PaddingSpace padding) {
|
||||
if (padding.spaces > 10000) return os;
|
||||
for (int i = 0; i < padding.spaces; ++i) {
|
||||
os << ' ';
|
||||
}
|
||||
return os;
|
||||
}
|
||||
|
||||
} // namespace v8::internal::compiler::turboshaft
|
400
src/compiler/turboshaft/optimization-phase.h
Normal file
400
src/compiler/turboshaft/optimization-phase.h
Normal file
@ -0,0 +1,400 @@
|
||||
// Copyright 2022 the V8 project authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
#ifndef V8_COMPILER_TURBOSHAFT_OPTIMIZATION_PHASE_H_
|
||||
#define V8_COMPILER_TURBOSHAFT_OPTIMIZATION_PHASE_H_
|
||||
|
||||
#include <utility>
|
||||
|
||||
#include "src/base/iterator.h"
|
||||
#include "src/base/logging.h"
|
||||
#include "src/base/small-vector.h"
|
||||
#include "src/base/vector.h"
|
||||
#include "src/compiler/turboshaft/graph.h"
|
||||
#include "src/compiler/turboshaft/operations.h"
|
||||
|
||||
namespace v8::internal::compiler::turboshaft {
|
||||
|
||||
int CountDecimalDigits(uint32_t value);
|
||||
struct PaddingSpace {
|
||||
int spaces;
|
||||
};
|
||||
std::ostream& operator<<(std::ostream& os, PaddingSpace padding);
|
||||
|
||||
struct AnalyzerBase {
|
||||
Zone* phase_zone;
|
||||
const Graph& graph;
|
||||
|
||||
void Run() {}
|
||||
bool OpIsUsed(OpIndex i) const { return true; }
|
||||
|
||||
explicit AnalyzerBase(const Graph& graph, Zone* phase_zone)
|
||||
: phase_zone(phase_zone), graph(graph) {}
|
||||
};
|
||||
|
||||
struct LivenessAnalyzer : AnalyzerBase {
|
||||
using Base = AnalyzerBase;
|
||||
// Using `uint8_t` instead of `bool` prevents `std::vector` from using a
|
||||
// bitvector, which has worse performance.
|
||||
std::vector<uint8_t> op_used;
|
||||
|
||||
LivenessAnalyzer(const Graph& graph, Zone* phase_zone)
|
||||
: AnalyzerBase(graph, phase_zone), op_used(graph.op_id_count(), false) {}
|
||||
|
||||
bool OpIsUsed(OpIndex i) { return op_used[i.id()]; }
|
||||
|
||||
void Run() {
|
||||
for (uint32_t unprocessed_count = graph.block_count();
|
||||
unprocessed_count > 0;) {
|
||||
BlockIndex block_index = static_cast<BlockIndex>(unprocessed_count - 1);
|
||||
--unprocessed_count;
|
||||
const Block& block = graph.Get(block_index);
|
||||
if (V8_UNLIKELY(block.IsLoop())) {
|
||||
ProcessBlock<true>(block, &unprocessed_count);
|
||||
} else {
|
||||
ProcessBlock<false>(block, &unprocessed_count);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <bool is_loop>
|
||||
void ProcessBlock(const Block& block, uint32_t* unprocessed_count) {
|
||||
auto op_range = graph.operations(block);
|
||||
for (auto it = op_range.end(); it != op_range.begin();) {
|
||||
--it;
|
||||
OpIndex index = it.Index();
|
||||
const Operation& op = *it;
|
||||
if (op.properties().is_required_when_unused) {
|
||||
op_used[index.id()] = true;
|
||||
} else if (!OpIsUsed(index)) {
|
||||
continue;
|
||||
}
|
||||
if constexpr (is_loop) {
|
||||
if (op.Is<PhiOp>()) {
|
||||
const PhiOp& phi = op.Cast<PhiOp>();
|
||||
// Mark the loop backedge as used. Trigger a revisit if it wasn't
|
||||
// marked as used already.
|
||||
if (!OpIsUsed(phi.inputs()[PhiOp::kLoopPhiBackEdgeIndex])) {
|
||||
Block* backedge = block.LastPredecessor();
|
||||
// Revisit the loop by increasing the `unprocessed_count` to include
|
||||
// all blocks of the loop.
|
||||
*unprocessed_count =
|
||||
std::max(*unprocessed_count, backedge->index().id() + 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
for (OpIndex input : op.inputs()) {
|
||||
op_used[input.id()] = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
template <class Analyzer, class Assembler>
|
||||
class OptimizationPhase {
|
||||
private:
|
||||
struct Impl;
|
||||
|
||||
public:
|
||||
static void Run(Graph* input, Zone* phase_zone) {
|
||||
Impl phase{*input, phase_zone};
|
||||
if (FLAG_turboshaft_trace_reduction) {
|
||||
phase.template Run<true>();
|
||||
} else {
|
||||
phase.template Run<false>();
|
||||
}
|
||||
}
|
||||
static void RunWithoutTracing(Graph* input, Zone* phase_zone) {
|
||||
Impl phase{*input, phase_zone};
|
||||
phase.template Run<false>();
|
||||
}
|
||||
};
|
||||
|
||||
template <class Analyzer, class Assembler>
|
||||
struct OptimizationPhase<Analyzer, Assembler>::Impl {
|
||||
Graph& input_graph;
|
||||
Zone* phase_zone;
|
||||
|
||||
Analyzer analyzer{input_graph, phase_zone};
|
||||
Assembler assembler{&input_graph.GetOrCreateCompanion(), phase_zone};
|
||||
const Block* current_input_block = nullptr;
|
||||
// Mappings from the old graph to the new graph.
|
||||
std::vector<Block*> block_mapping{input_graph.block_count(), nullptr};
|
||||
std::vector<OpIndex> op_mapping{input_graph.op_id_count(),
|
||||
OpIndex::Invalid()};
|
||||
|
||||
// `trace_reduction` is a template parameter to avoid paying for tracing at
|
||||
// runtime.
|
||||
template <bool trace_reduction>
|
||||
void Run() {
|
||||
analyzer.Run();
|
||||
|
||||
for (const Block& input_block : input_graph.blocks()) {
|
||||
block_mapping[input_block.index().id()] =
|
||||
assembler.NewBlock(input_block.kind());
|
||||
}
|
||||
for (const Block& input_block : input_graph.blocks()) {
|
||||
current_input_block = &input_block;
|
||||
if constexpr (trace_reduction) {
|
||||
std::cout << PrintAsBlockHeader{input_block} << "\n";
|
||||
}
|
||||
if (!assembler.Bind(MapToNewGraph(input_block.index()))) {
|
||||
if constexpr (trace_reduction) TraceBlockUnreachable();
|
||||
continue;
|
||||
}
|
||||
assembler.current_block()->SetDeferred(input_block.IsDeferred());
|
||||
auto op_range = input_graph.operations(input_block);
|
||||
for (auto it = op_range.begin(); it != op_range.end(); ++it) {
|
||||
const Operation& op = *it;
|
||||
OpIndex index = it.Index();
|
||||
OpIndex first_output_index = assembler.graph().next_operation_index();
|
||||
if constexpr (trace_reduction) TraceReductionStart(index);
|
||||
if (!analyzer.OpIsUsed(index)) {
|
||||
if constexpr (trace_reduction) TraceOperationUnused();
|
||||
continue;
|
||||
}
|
||||
OpIndex new_index;
|
||||
if (input_block.IsLoop() && op.Is<PhiOp>()) {
|
||||
const PhiOp& phi = op.Cast<PhiOp>();
|
||||
new_index = assembler.PendingLoopPhi(MapToNewGraph(phi.inputs()[0]),
|
||||
phi.rep, phi.inputs()[1]);
|
||||
if constexpr (trace_reduction) {
|
||||
TraceReductionResult(first_output_index, new_index);
|
||||
}
|
||||
} else {
|
||||
switch (op.opcode) {
|
||||
#define EMIT_INSTR_CASE(Name) \
|
||||
case Opcode::k##Name: \
|
||||
new_index = this->Reduce##Name(op.Cast<Name##Op>()); \
|
||||
break;
|
||||
TURBOSHAFT_OPERATION_LIST(EMIT_INSTR_CASE)
|
||||
#undef EMIT_INSTR_CASE
|
||||
}
|
||||
if constexpr (trace_reduction) {
|
||||
TraceReductionResult(first_output_index, new_index);
|
||||
}
|
||||
}
|
||||
op_mapping[index.id()] = new_index;
|
||||
}
|
||||
if constexpr (trace_reduction) TraceBlockFinished();
|
||||
}
|
||||
input_graph.SwapWithCompanion();
|
||||
}
|
||||
|
||||
void TraceReductionStart(OpIndex index) {
|
||||
std::cout << "╭── o" << index.id() << ": "
|
||||
<< PaddingSpace{5 - CountDecimalDigits(index.id())}
|
||||
<< OperationPrintStyle{input_graph.Get(index), "#o"} << "\n";
|
||||
}
|
||||
void TraceOperationUnused() { std::cout << "╰─> unused\n\n"; }
|
||||
void TraceBlockUnreachable() { std::cout << "╰─> unreachable\n\n"; }
|
||||
void TraceReductionResult(OpIndex first_output_index, OpIndex new_index) {
|
||||
if (new_index < first_output_index) {
|
||||
// The operation was replaced with an already existing one.
|
||||
std::cout << "╰─> #n" << new_index.id() << "\n";
|
||||
}
|
||||
bool before_arrow = new_index >= first_output_index;
|
||||
for (const Operation& op : assembler.graph().operations(
|
||||
first_output_index, assembler.graph().next_operation_index())) {
|
||||
OpIndex index = assembler.graph().Index(op);
|
||||
const char* prefix;
|
||||
if (index == new_index) {
|
||||
prefix = "╰─>";
|
||||
before_arrow = false;
|
||||
} else if (before_arrow) {
|
||||
prefix = "│ ";
|
||||
} else {
|
||||
prefix = " ";
|
||||
}
|
||||
std::cout << prefix << " n" << index.id() << ": "
|
||||
<< PaddingSpace{5 - CountDecimalDigits(index.id())}
|
||||
<< OperationPrintStyle{assembler.graph().Get(index), "#n"}
|
||||
<< "\n";
|
||||
}
|
||||
std::cout << "\n";
|
||||
}
|
||||
void TraceBlockFinished() { std::cout << "\n"; }
|
||||
|
||||
// These functions take an operation from the old graph and use the assembler
|
||||
// to emit a corresponding operation in the new graph, translating inputs and
|
||||
// blocks accordingly.
|
||||
|
||||
V8_INLINE OpIndex ReduceGoto(const GotoOp& op) {
|
||||
Block* destination = MapToNewGraph(op.destination->index());
|
||||
if (destination->IsBound()) {
|
||||
DCHECK(destination->IsLoop());
|
||||
FixLoopPhis(destination);
|
||||
}
|
||||
assembler.current_block()->SetOrigin(current_input_block);
|
||||
return assembler.Goto(destination);
|
||||
}
|
||||
V8_INLINE OpIndex ReduceBranch(const BranchOp& op) {
|
||||
Block* if_true = MapToNewGraph(op.if_true->index());
|
||||
Block* if_false = MapToNewGraph(op.if_false->index());
|
||||
return assembler.Branch(MapToNewGraph(op.condition()), if_true, if_false);
|
||||
}
|
||||
OpIndex ReduceSwitch(const SwitchOp& op) {
|
||||
base::SmallVector<SwitchOp::Case, 16> cases;
|
||||
for (SwitchOp::Case c : op.cases) {
|
||||
cases.emplace_back(c.value, MapToNewGraph(c.destination->index()));
|
||||
}
|
||||
return assembler.Switch(
|
||||
MapToNewGraph(op.input()),
|
||||
assembler.graph_zone()->CloneVector(base::VectorOf(cases)),
|
||||
MapToNewGraph(op.default_case->index()));
|
||||
}
|
||||
OpIndex ReducePhi(const PhiOp& op) {
|
||||
base::Vector<const OpIndex> old_inputs = op.inputs();
|
||||
base::SmallVector<OpIndex, 8> new_inputs;
|
||||
Block* old_pred = current_input_block->LastPredecessor();
|
||||
Block* new_pred = assembler.current_block()->LastPredecessor();
|
||||
// Control predecessors might be missing after the optimization phase. So we
|
||||
// need to skip phi inputs that belong to control predecessors that have no
|
||||
// equivalent in the new graph. We do, however, assume that the order of
|
||||
// control predecessors did not change.
|
||||
for (OpIndex input : base::Reversed(old_inputs)) {
|
||||
if (new_pred->Origin() == old_pred) {
|
||||
new_inputs.push_back(MapToNewGraph(input));
|
||||
new_pred = new_pred->NeighboringPredecessor();
|
||||
}
|
||||
old_pred = old_pred->NeighboringPredecessor();
|
||||
}
|
||||
DCHECK_NULL(old_pred);
|
||||
DCHECK_NULL(new_pred);
|
||||
std::reverse(new_inputs.begin(), new_inputs.end());
|
||||
return assembler.Phi(base::VectorOf(new_inputs), op.rep);
|
||||
}
|
||||
OpIndex ReducePendingLoopPhi(const PendingLoopPhiOp& op) { UNREACHABLE(); }
|
||||
V8_INLINE OpIndex ReduceFrameState(const FrameStateOp& op) {
|
||||
auto inputs = MapToNewGraph<32>(op.inputs());
|
||||
return assembler.FrameState(base::VectorOf(inputs), op.inlined, op.data);
|
||||
}
|
||||
OpIndex ReduceCall(const CallOp& op) {
|
||||
OpIndex callee = MapToNewGraph(op.callee());
|
||||
auto arguments = MapToNewGraph<16>(op.arguments());
|
||||
return assembler.Call(callee, base::VectorOf(arguments), op.descriptor);
|
||||
}
|
||||
OpIndex ReduceReturn(const ReturnOp& op) {
|
||||
auto inputs = MapToNewGraph<4>(op.inputs());
|
||||
return assembler.Return(base::VectorOf(inputs), op.pop_count);
|
||||
}
|
||||
OpIndex ReduceOverflowCheckedBinop(const OverflowCheckedBinopOp& op) {
|
||||
return assembler.OverflowCheckedBinop(
|
||||
MapToNewGraph(op.left()), MapToNewGraph(op.right()), op.kind, op.rep);
|
||||
}
|
||||
OpIndex ReduceFloatUnary(const FloatUnaryOp& op) {
|
||||
return assembler.FloatUnary(MapToNewGraph(op.input()), op.kind, op.rep);
|
||||
}
|
||||
OpIndex ReduceShift(const ShiftOp& op) {
|
||||
return assembler.Shift(MapToNewGraph(op.left()), MapToNewGraph(op.right()),
|
||||
op.kind, op.rep);
|
||||
}
|
||||
OpIndex ReduceEqual(const EqualOp& op) {
|
||||
return assembler.Equal(MapToNewGraph(op.left()), MapToNewGraph(op.right()),
|
||||
op.rep);
|
||||
}
|
||||
OpIndex ReduceComparison(const ComparisonOp& op) {
|
||||
return assembler.Comparison(MapToNewGraph(op.left()),
|
||||
MapToNewGraph(op.right()), op.kind, op.rep);
|
||||
}
|
||||
OpIndex ReduceChange(const ChangeOp& op) {
|
||||
return assembler.Change(MapToNewGraph(op.input()), op.kind, op.from, op.to);
|
||||
}
|
||||
OpIndex ReduceTaggedBitcast(const TaggedBitcastOp& op) {
|
||||
return assembler.TaggedBitcast(MapToNewGraph(op.input()), op.from, op.to);
|
||||
}
|
||||
OpIndex ReduceConstant(const ConstantOp& op) {
|
||||
return assembler.Constant(op.kind, op.storage);
|
||||
}
|
||||
OpIndex ReduceLoad(const LoadOp& op) {
|
||||
return assembler.Load(MapToNewGraph(op.base()), op.kind, op.loaded_rep,
|
||||
op.offset);
|
||||
}
|
||||
OpIndex ReduceIndexedLoad(const IndexedLoadOp& op) {
|
||||
return assembler.IndexedLoad(
|
||||
MapToNewGraph(op.base()), MapToNewGraph(op.index()), op.kind,
|
||||
op.loaded_rep, op.offset, op.element_size_log2);
|
||||
}
|
||||
OpIndex ReduceStore(const StoreOp& op) {
|
||||
return assembler.Store(MapToNewGraph(op.base()), MapToNewGraph(op.value()),
|
||||
op.kind, op.stored_rep, op.write_barrier, op.offset);
|
||||
}
|
||||
OpIndex ReduceIndexedStore(const IndexedStoreOp& op) {
|
||||
return assembler.IndexedStore(
|
||||
MapToNewGraph(op.base()), MapToNewGraph(op.index()),
|
||||
MapToNewGraph(op.value()), op.kind, op.stored_rep, op.write_barrier,
|
||||
op.offset, op.element_size_log2);
|
||||
}
|
||||
OpIndex ReduceParameter(const ParameterOp& op) {
|
||||
return assembler.Parameter(op.parameter_index, op.debug_name);
|
||||
}
|
||||
OpIndex ReduceStackPointerGreaterThan(const StackPointerGreaterThanOp& op) {
|
||||
return assembler.StackPointerGreaterThan(MapToNewGraph(op.stack_limit()),
|
||||
op.kind);
|
||||
}
|
||||
OpIndex ReduceLoadStackCheckOffset(const LoadStackCheckOffsetOp& op) {
|
||||
return assembler.LoadStackCheckOffset();
|
||||
}
|
||||
OpIndex ReduceCheckLazyDeopt(const CheckLazyDeoptOp& op) {
|
||||
return assembler.CheckLazyDeopt(MapToNewGraph(op.call()),
|
||||
MapToNewGraph(op.frame_state()));
|
||||
}
|
||||
OpIndex ReduceDeoptimize(const DeoptimizeOp& op) {
|
||||
return assembler.Deoptimize(MapToNewGraph(op.frame_state()), op.parameters);
|
||||
}
|
||||
OpIndex ReduceDeoptimizeIf(const DeoptimizeIfOp& op) {
|
||||
return assembler.DeoptimizeIf(MapToNewGraph(op.condition()),
|
||||
MapToNewGraph(op.frame_state()), op.negated,
|
||||
op.parameters);
|
||||
}
|
||||
OpIndex ReduceProjection(const ProjectionOp& op) {
|
||||
return assembler.Projection(MapToNewGraph(op.input()), op.kind);
|
||||
}
|
||||
OpIndex ReduceBinop(const BinopOp& op) {
|
||||
return assembler.Binop(MapToNewGraph(op.left()), MapToNewGraph(op.right()),
|
||||
op.kind, op.rep);
|
||||
}
|
||||
OpIndex ReduceUnreachable(const UnreachableOp& op) {
|
||||
return assembler.Unreachable();
|
||||
}
|
||||
|
||||
OpIndex MapToNewGraph(OpIndex old_index) {
|
||||
OpIndex result = op_mapping[old_index.id()];
|
||||
DCHECK(result.valid());
|
||||
return result;
|
||||
}
|
||||
|
||||
template <size_t expected_size>
|
||||
base::SmallVector<OpIndex, expected_size> MapToNewGraph(
|
||||
base::Vector<const OpIndex> inputs) {
|
||||
base::SmallVector<OpIndex, expected_size> result;
|
||||
for (OpIndex input : inputs) {
|
||||
result.push_back(MapToNewGraph(input));
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
Block* MapToNewGraph(BlockIndex old_index) {
|
||||
Block* result = block_mapping[old_index.id()];
|
||||
DCHECK_NOT_NULL(result);
|
||||
return result;
|
||||
}
|
||||
|
||||
void FixLoopPhis(Block* loop) {
|
||||
DCHECK(loop->IsLoop());
|
||||
for (Operation& op : assembler.graph().operations(*loop)) {
|
||||
if (auto* pending_phi = op.TryCast<PendingLoopPhiOp>()) {
|
||||
assembler.graph().template Replace<PhiOp>(
|
||||
assembler.graph().Index(*pending_phi),
|
||||
base::VectorOf({pending_phi->first(),
|
||||
MapToNewGraph(pending_phi->old_backedge_index)}),
|
||||
pending_phi->rep);
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace v8::internal::compiler::turboshaft
|
||||
|
||||
#endif // V8_COMPILER_TURBOSHAFT_OPTIMIZATION_PHASE_H_
|
@ -106,7 +106,7 @@ RecreateScheduleResult ScheduleBuilder::Run() {
|
||||
DCHECK_GE(input_graph.block_count(), 1);
|
||||
// The schedule needs to contain an dummy end block because the register
|
||||
// allocator expects this. This block is not actually reachable with control
|
||||
// flow. It is added here because the TurboShaft grahp doesn't contain such a
|
||||
// flow. It is added here because the Turboshaft grahp doesn't contain such a
|
||||
// block.
|
||||
blocks.reserve(input_graph.block_count() + 1);
|
||||
blocks.push_back(current_block);
|
||||
|
@ -966,7 +966,9 @@ DEFINE_FLOAT(script_delay_fraction, 0.0,
|
||||
"busy wait after each Script::Run by the given fraction of the "
|
||||
"run's duration")
|
||||
|
||||
DEFINE_BOOL(turboshaft, false, "enable TurboFan's TurboShaft phases")
|
||||
DEFINE_BOOL(turboshaft, false, "enable TurboFan's Turboshaft phases")
|
||||
DEFINE_BOOL(turboshaft_trace_reduction, false,
|
||||
"trace individual Turboshaft reduction steps")
|
||||
|
||||
// Favor memory over execution speed.
|
||||
DEFINE_BOOL(optimize_for_size, false,
|
||||
|
@ -369,7 +369,8 @@ class RuntimeCallTimer final {
|
||||
ADD_THREAD_SPECIFIC_COUNTER(V, Optimize, SimplifiedLowering) \
|
||||
ADD_THREAD_SPECIFIC_COUNTER(V, Optimize, StoreStoreElimination) \
|
||||
ADD_THREAD_SPECIFIC_COUNTER(V, Optimize, TraceScheduleAndVerify) \
|
||||
ADD_THREAD_SPECIFIC_COUNTER(V, Optimize, BuildTurboShaft) \
|
||||
ADD_THREAD_SPECIFIC_COUNTER(V, Optimize, BuildTurboshaft) \
|
||||
ADD_THREAD_SPECIFIC_COUNTER(V, Optimize, OptimizeTurboshaft) \
|
||||
ADD_THREAD_SPECIFIC_COUNTER(V, Optimize, TurboshaftRecreateSchedule) \
|
||||
ADD_THREAD_SPECIFIC_COUNTER(V, Optimize, TypeAssertions) \
|
||||
ADD_THREAD_SPECIFIC_COUNTER(V, Optimize, TypedLowering) \
|
||||
|
Loading…
Reference in New Issue
Block a user