[turboshaft] port decompression optimization
Bug: v8:12783 Change-Id: Ib23aa682054bfcf35efe1adef64fc97afe8f9619 Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3743642 Reviewed-by: Darius Mercadier <dmercadier@chromium.org> Commit-Queue: Tobias Tebbi <tebbi@chromium.org> Cr-Commit-Position: refs/heads/main@{#81615}
This commit is contained in:
parent
8103fe573a
commit
9d45d274b1
@ -2851,6 +2851,8 @@ filegroup(
|
||||
"src/compiler/store-store-elimination.cc",
|
||||
"src/compiler/store-store-elimination.h",
|
||||
"src/compiler/turboshaft/assembler.h",
|
||||
"src/compiler/turboshaft/decompression-optimization.cc",
|
||||
"src/compiler/turboshaft/decompression-optimization.h",
|
||||
"src/compiler/turboshaft/deopt-data.h",
|
||||
"src/compiler/turboshaft/graph-builder.cc",
|
||||
"src/compiler/turboshaft/graph-builder.h",
|
||||
|
7
BUILD.gn
7
BUILD.gn
@ -1525,6 +1525,7 @@ config("v8_gcov_coverage_cflags") {
|
||||
cflags = [
|
||||
"-fprofile-arcs",
|
||||
"-ftest-coverage",
|
||||
|
||||
# We already block on gcc warnings on other bots. Let's not block here to
|
||||
# always generate coverage reports.
|
||||
"-Wno-error",
|
||||
@ -2932,6 +2933,7 @@ v8_header_set("v8_internal_headers") {
|
||||
"src/compiler/state-values-utils.h",
|
||||
"src/compiler/store-store-elimination.h",
|
||||
"src/compiler/turboshaft/assembler.h",
|
||||
"src/compiler/turboshaft/decompression-optimization.h",
|
||||
"src/compiler/turboshaft/deopt-data.h",
|
||||
"src/compiler/turboshaft/graph-builder.h",
|
||||
"src/compiler/turboshaft/graph-visualizer.h",
|
||||
@ -4170,6 +4172,7 @@ v8_source_set("v8_turboshaft") {
|
||||
visibility = [ ":*" ] # Only targets in this file can depend on this.
|
||||
|
||||
sources = [
|
||||
"src/compiler/turboshaft/decompression-optimization.cc",
|
||||
"src/compiler/turboshaft/graph-builder.cc",
|
||||
"src/compiler/turboshaft/graph-visualizer.cc",
|
||||
"src/compiler/turboshaft/graph.cc",
|
||||
@ -6205,9 +6208,7 @@ group("v8_clusterfuzz") {
|
||||
group("v8_gcc_light") {
|
||||
testonly = true
|
||||
|
||||
deps = [
|
||||
":d8",
|
||||
]
|
||||
deps = [ ":d8" ]
|
||||
}
|
||||
|
||||
group("v8_archive") {
|
||||
|
@ -78,6 +78,7 @@
|
||||
#include "src/compiler/simplified-operator.h"
|
||||
#include "src/compiler/store-store-elimination.h"
|
||||
#include "src/compiler/turboshaft/assembler.h"
|
||||
#include "src/compiler/turboshaft/decompression-optimization.h"
|
||||
#include "src/compiler/turboshaft/graph-builder.h"
|
||||
#include "src/compiler/turboshaft/graph-visualizer.h"
|
||||
#include "src/compiler/turboshaft/graph.h"
|
||||
@ -176,7 +177,6 @@ class PipelineData {
|
||||
assembler_options_(AssemblerOptions::Default(isolate)) {
|
||||
PhaseScope scope(pipeline_statistics, "V8.TFInitPipelineData");
|
||||
graph_ = graph_zone_->New<Graph>(graph_zone_);
|
||||
turboshaft_graph_ = std::make_unique<turboshaft::Graph>(graph_zone_);
|
||||
source_positions_ = graph_zone_->New<SourcePositionTable>(graph_);
|
||||
node_origins_ = info->trace_turbo_json()
|
||||
? graph_zone_->New<NodeOriginTable>(graph_)
|
||||
@ -350,6 +350,11 @@ class PipelineData {
|
||||
Zone* graph_zone() const { return graph_zone_; }
|
||||
Graph* graph() const { return graph_; }
|
||||
void set_graph(Graph* graph) { graph_ = graph; }
|
||||
void CreateTurboshaftGraph() {
|
||||
DCHECK_NULL(turboshaft_graph_);
|
||||
turboshaft_graph_ = std::make_unique<turboshaft::Graph>(graph_zone_);
|
||||
}
|
||||
bool HasTurboshaftGraph() const { return turboshaft_graph_ != nullptr; }
|
||||
turboshaft::Graph& turboshaft_graph() const { return *turboshaft_graph_; }
|
||||
SourcePositionTable* source_positions() const { return source_positions_; }
|
||||
NodeOriginTable* node_origins() const { return node_origins_; }
|
||||
@ -2004,7 +2009,11 @@ struct DecompressionOptimizationPhase {
|
||||
DECL_PIPELINE_PHASE_CONSTANTS(DecompressionOptimization)
|
||||
|
||||
void Run(PipelineData* data, Zone* temp_zone) {
|
||||
if (COMPRESS_POINTERS_BOOL) {
|
||||
if (!COMPRESS_POINTERS_BOOL) return;
|
||||
if (data->HasTurboshaftGraph()) {
|
||||
turboshaft::RunDecompressionOptimization(data->turboshaft_graph(),
|
||||
temp_zone);
|
||||
} else {
|
||||
DecompressionOptimizer decompression_optimizer(
|
||||
temp_zone, data->graph(), data->common(), data->machine());
|
||||
decompression_optimizer.Reduce();
|
||||
@ -2028,6 +2037,7 @@ struct BuildTurboshaftPhase {
|
||||
base::Optional<BailoutReason> Run(PipelineData* data, Zone* temp_zone) {
|
||||
Schedule* schedule = data->schedule();
|
||||
data->reset_schedule();
|
||||
data->CreateTurboshaftGraph();
|
||||
return turboshaft::BuildGraph(schedule, data->graph_zone(), temp_zone,
|
||||
&data->turboshaft_graph(),
|
||||
data->source_positions());
|
||||
@ -2928,8 +2938,10 @@ bool PipelineImpl::OptimizeGraph(Linkage* linkage) {
|
||||
Run<MachineOperatorOptimizationPhase>();
|
||||
RunPrintAndVerify(MachineOperatorOptimizationPhase::phase_name(), true);
|
||||
|
||||
Run<DecompressionOptimizationPhase>();
|
||||
RunPrintAndVerify(DecompressionOptimizationPhase::phase_name(), true);
|
||||
if (!FLAG_turboshaft) {
|
||||
Run<DecompressionOptimizationPhase>();
|
||||
RunPrintAndVerify(DecompressionOptimizationPhase::phase_name(), true);
|
||||
}
|
||||
|
||||
Run<BranchConditionDuplicationPhase>();
|
||||
RunPrintAndVerify(BranchConditionDuplicationPhase::phase_name(), true);
|
||||
@ -2952,6 +2964,10 @@ bool PipelineImpl::OptimizeGraph(Linkage* linkage) {
|
||||
Run<OptimizeTurboshaftPhase>();
|
||||
Run<PrintTurboshaftGraphPhase>(OptimizeTurboshaftPhase::phase_name());
|
||||
|
||||
Run<DecompressionOptimizationPhase>();
|
||||
Run<PrintTurboshaftGraphPhase>(
|
||||
DecompressionOptimizationPhase::phase_name());
|
||||
|
||||
Run<TurboshaftRecreateSchedulePhase>(linkage);
|
||||
TraceSchedule(data->info(), data, data->schedule(),
|
||||
TurboshaftRecreateSchedulePhase::phase_name());
|
||||
|
221
src/compiler/turboshaft/decompression-optimization.cc
Normal file
221
src/compiler/turboshaft/decompression-optimization.cc
Normal file
@ -0,0 +1,221 @@
|
||||
// Copyright 2022 the V8 project authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
#include "src/compiler/turboshaft/decompression-optimization.h"
|
||||
|
||||
#include "src/base/v8-fallthrough.h"
|
||||
#include "src/codegen/machine-type.h"
|
||||
#include "src/compiler/turboshaft/operations.h"
|
||||
#include "src/compiler/turboshaft/optimization-phase.h"
|
||||
|
||||
namespace v8::internal::compiler::turboshaft {
|
||||
|
||||
namespace {
|
||||
|
||||
// Analyze the uses of values to determine if a compressed value has any uses
|
||||
// that need it to be decompressed. Since this analysis looks at uses, we
|
||||
// iterate the graph backwards, updating the analysis state for the inputs of an
|
||||
// operation. Due to loop phis, we need to compute a fixed-point. Therefore, we
|
||||
// re-visit the loop if a loop phi backedge changes something. As a performance
|
||||
// optimization, we keep track of operations (`candidates`) that need to be
|
||||
// updated potentially, so that we don't have to walk the whole graph again.
|
||||
struct DecompressionAnalyzer : AnalyzerBase {
|
||||
using Base = AnalyzerBase;
|
||||
// We use `uint8_t` instead of `bool` here to avoid the bitvector optimization
|
||||
// of std::vector.
|
||||
FixedSidetable<uint8_t> needs_decompression;
|
||||
ZoneVector<OpIndex> candidates;
|
||||
|
||||
DecompressionAnalyzer(const Graph& graph, Zone* phase_zone)
|
||||
: AnalyzerBase(graph, phase_zone),
|
||||
needs_decompression(graph.op_id_count(), phase_zone),
|
||||
candidates(phase_zone) {
|
||||
candidates.reserve(graph.op_id_count() / 8);
|
||||
}
|
||||
|
||||
void Run() {
|
||||
for (uint32_t next_block_id = graph.block_count() - 1; next_block_id > 0;) {
|
||||
BlockIndex block_index = BlockIndex(next_block_id);
|
||||
--next_block_id;
|
||||
const Block& block = graph.Get(block_index);
|
||||
if (block.IsLoop()) {
|
||||
ProcessBlock<true>(block, &next_block_id);
|
||||
} else {
|
||||
ProcessBlock<false>(block, &next_block_id);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool NeedsDecompression(OpIndex op) { return needs_decompression[op]; }
|
||||
bool NeedsDecompression(const Operation& op) {
|
||||
return NeedsDecompression(graph.Index(op));
|
||||
}
|
||||
bool MarkAsNeedsDecompression(OpIndex op) {
|
||||
return needs_decompression[op] = true;
|
||||
}
|
||||
|
||||
template <bool is_loop>
|
||||
void ProcessBlock(const Block& block, uint32_t* next_block_id) {
|
||||
for (const Operation& op : base::Reversed(graph.operations(block))) {
|
||||
if (is_loop && op.Is<PhiOp>() && NeedsDecompression(op)) {
|
||||
const PhiOp& phi = op.Cast<PhiOp>();
|
||||
if (!NeedsDecompression(phi.input(1))) {
|
||||
Block* backedge = block.LastPredecessor();
|
||||
*next_block_id =
|
||||
std::max<uint32_t>(*next_block_id, backedge->index().id());
|
||||
}
|
||||
}
|
||||
ProcessOperation(op);
|
||||
}
|
||||
}
|
||||
void ProcessOperation(const Operation& op);
|
||||
};
|
||||
|
||||
void DecompressionAnalyzer::ProcessOperation(const Operation& op) {
|
||||
switch (op.opcode) {
|
||||
case Opcode::kStore: {
|
||||
auto& store = op.Cast<StoreOp>();
|
||||
MarkAsNeedsDecompression(store.base());
|
||||
if (!IsAnyTagged(store.stored_rep))
|
||||
MarkAsNeedsDecompression(store.value());
|
||||
break;
|
||||
}
|
||||
case Opcode::kIndexedStore: {
|
||||
auto& store = op.Cast<IndexedStoreOp>();
|
||||
MarkAsNeedsDecompression(store.base());
|
||||
MarkAsNeedsDecompression(store.index());
|
||||
if (!IsAnyTagged(store.stored_rep))
|
||||
MarkAsNeedsDecompression(store.value());
|
||||
break;
|
||||
}
|
||||
case Opcode::kFrameState:
|
||||
// The deopt code knows how to handle Compressed inputs, both
|
||||
// MachineRepresentation kCompressed values and CompressedHeapConstants.
|
||||
break;
|
||||
case Opcode::kPhi: {
|
||||
// Replicate the phi's state for its inputs.
|
||||
auto& phi = op.Cast<PhiOp>();
|
||||
if (NeedsDecompression(op)) {
|
||||
for (OpIndex input : phi.inputs()) {
|
||||
MarkAsNeedsDecompression(input);
|
||||
}
|
||||
} else {
|
||||
candidates.push_back(graph.Index(op));
|
||||
}
|
||||
break;
|
||||
}
|
||||
case Opcode::kEqual: {
|
||||
auto& equal = op.Cast<EqualOp>();
|
||||
if (equal.rep == MachineRepresentation::kWord64) {
|
||||
MarkAsNeedsDecompression(equal.left());
|
||||
MarkAsNeedsDecompression(equal.right());
|
||||
}
|
||||
break;
|
||||
}
|
||||
case Opcode::kComparison: {
|
||||
auto& comp = op.Cast<ComparisonOp>();
|
||||
if (comp.rep == MachineRepresentation::kWord64) {
|
||||
MarkAsNeedsDecompression(comp.left());
|
||||
MarkAsNeedsDecompression(comp.right());
|
||||
}
|
||||
break;
|
||||
}
|
||||
case Opcode::kBinop: {
|
||||
auto& binary_op = op.Cast<BinopOp>();
|
||||
if (binary_op.rep == MachineRepresentation::kWord64) {
|
||||
MarkAsNeedsDecompression(binary_op.left());
|
||||
MarkAsNeedsDecompression(binary_op.right());
|
||||
}
|
||||
break;
|
||||
}
|
||||
case Opcode::kShift: {
|
||||
auto& shift_op = op.Cast<ShiftOp>();
|
||||
if (shift_op.rep == MachineRepresentation::kWord64) {
|
||||
MarkAsNeedsDecompression(shift_op.left());
|
||||
}
|
||||
break;
|
||||
}
|
||||
case Opcode::kChange: {
|
||||
auto& change = op.Cast<ChangeOp>();
|
||||
if (change.to == MachineRepresentation::kWord64 &&
|
||||
NeedsDecompression(op)) {
|
||||
MarkAsNeedsDecompression(change.input());
|
||||
}
|
||||
break;
|
||||
}
|
||||
case Opcode::kTaggedBitcast: {
|
||||
auto& bitcast = op.Cast<TaggedBitcastOp>();
|
||||
if (NeedsDecompression(op)) {
|
||||
MarkAsNeedsDecompression(bitcast.input());
|
||||
}
|
||||
break;
|
||||
}
|
||||
case Opcode::kIndexedLoad:
|
||||
case Opcode::kLoad:
|
||||
case Opcode::kConstant:
|
||||
if (!NeedsDecompression(op)) {
|
||||
candidates.push_back(graph.Index(op));
|
||||
}
|
||||
V8_FALLTHROUGH;
|
||||
default:
|
||||
for (OpIndex input : op.inputs()) {
|
||||
MarkAsNeedsDecompression(input);
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
// Instead of using `OptimizationPhase`, we directly mutate the operations after
|
||||
// the analysis. Doing it in-place is possible because we only modify operation
|
||||
// options.
|
||||
void RunDecompressionOptimization(Graph& graph, Zone* phase_zone) {
|
||||
DecompressionAnalyzer analyzer(graph, phase_zone);
|
||||
analyzer.Run();
|
||||
for (OpIndex op_idx : analyzer.candidates) {
|
||||
Operation& op = graph.Get(op_idx);
|
||||
if (analyzer.NeedsDecompression(op)) continue;
|
||||
switch (op.opcode) {
|
||||
case Opcode::kConstant: {
|
||||
auto& constant = op.Cast<ConstantOp>();
|
||||
if (constant.kind == ConstantOp::Kind::kHeapObject) {
|
||||
constant.kind = ConstantOp::Kind::kCompressedHeapObject;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case Opcode::kPhi: {
|
||||
auto& phi = op.Cast<PhiOp>();
|
||||
if (phi.rep == MachineRepresentation::kTagged) {
|
||||
phi.rep = MachineRepresentation::kCompressed;
|
||||
} else if (phi.rep == MachineRepresentation::kTaggedPointer) {
|
||||
phi.rep = MachineRepresentation::kCompressedPointer;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case Opcode::kLoad: {
|
||||
auto& load = op.Cast<LoadOp>();
|
||||
if (load.loaded_rep == MachineType::AnyTagged()) {
|
||||
load.loaded_rep = MachineType::AnyCompressed();
|
||||
} else if (load.loaded_rep == MachineType::TaggedPointer()) {
|
||||
load.loaded_rep = MachineType::CompressedPointer();
|
||||
}
|
||||
break;
|
||||
}
|
||||
case Opcode::kIndexedLoad: {
|
||||
auto& load = op.Cast<IndexedLoadOp>();
|
||||
if (load.loaded_rep == MachineType::AnyTagged()) {
|
||||
load.loaded_rep = MachineType::AnyCompressed();
|
||||
} else if (load.loaded_rep == MachineType::TaggedPointer()) {
|
||||
load.loaded_rep = MachineType::CompressedPointer();
|
||||
}
|
||||
break;
|
||||
}
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace v8::internal::compiler::turboshaft
|
25
src/compiler/turboshaft/decompression-optimization.h
Normal file
25
src/compiler/turboshaft/decompression-optimization.h
Normal file
@ -0,0 +1,25 @@
|
||||
// Copyright 2022 the V8 project authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
#ifndef V8_COMPILER_TURBOSHAFT_DECOMPRESSION_OPTIMIZATION_H_
|
||||
#define V8_COMPILER_TURBOSHAFT_DECOMPRESSION_OPTIMIZATION_H_
|
||||
|
||||
namespace v8::internal {
|
||||
class Zone;
|
||||
}
|
||||
namespace v8::internal::compiler::turboshaft {
|
||||
class Graph;
|
||||
|
||||
// The purpose of decompression optimization is to avoid unnecessary pointer
|
||||
// decompression operations. If a compressed value loaded from the heap is only
|
||||
// used as a Smi or to store it back into the heap, then there is no need to add
|
||||
// the root pointer to make it dereferencable. By performing this optimization
|
||||
// late in the pipeline, all the preceding phases can safely assume that
|
||||
// everything is decompressed and do not need to worry about the distinction
|
||||
// between compressed and uncompressed pointers.
|
||||
void RunDecompressionOptimization(Graph& graph, Zone* phase_zone);
|
||||
|
||||
} // namespace v8::internal::compiler::turboshaft
|
||||
|
||||
#endif // V8_COMPILER_TURBOSHAFT_DECOMPRESSION_OPTIMIZATION_H_
|
@ -128,6 +128,7 @@ class OperationBuffer {
|
||||
DCHECK_GT(operation_sizes_[idx.id()], 0);
|
||||
OpIndex result = OpIndex(idx.offset() + operation_sizes_[idx.id()] *
|
||||
sizeof(OperationStorageSlot));
|
||||
DCHECK_LT(0, result.offset());
|
||||
DCHECK_LE(result.offset(), capacity() * sizeof(OperationStorageSlot));
|
||||
return result;
|
||||
}
|
||||
@ -136,6 +137,7 @@ class OperationBuffer {
|
||||
DCHECK_GT(operation_sizes_[idx.id() - 1], 0);
|
||||
OpIndex result = OpIndex(idx.offset() - operation_sizes_[idx.id() - 1] *
|
||||
sizeof(OperationStorageSlot));
|
||||
DCHECK_LE(0, result.offset());
|
||||
DCHECK_LT(result.offset(), capacity() * sizeof(OperationStorageSlot));
|
||||
return result;
|
||||
}
|
||||
|
@ -66,6 +66,27 @@ class GrowingSidetable {
|
||||
}
|
||||
};
|
||||
|
||||
// A fixed-size sidetable mapping from `OpIndex` to `T`.
|
||||
// Elements are default-initialized.
|
||||
template <class T>
|
||||
class FixedSidetable {
|
||||
public:
|
||||
explicit FixedSidetable(size_t size, Zone* zone) : table_(size, zone) {}
|
||||
|
||||
T& operator[](OpIndex op) {
|
||||
DCHECK_LT(op.id(), table_.size());
|
||||
return table_[op.id()];
|
||||
}
|
||||
|
||||
const T& operator[](OpIndex op) const {
|
||||
DCHECK_LT(op.id(), table_.size());
|
||||
return table_[op.id()];
|
||||
}
|
||||
|
||||
private:
|
||||
ZoneVector<T> table_;
|
||||
};
|
||||
|
||||
} // namespace v8::internal::compiler::turboshaft
|
||||
|
||||
#endif // V8_COMPILER_TURBOSHAFT_SIDETABLE_H_
|
||||
|
Loading…
Reference in New Issue
Block a user