[turboshaft] port decompression optimization

Bug: v8:12783
Change-Id: Ib23aa682054bfcf35efe1adef64fc97afe8f9619
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3743642
Reviewed-by: Darius Mercadier <dmercadier@chromium.org>
Commit-Queue: Tobias Tebbi <tebbi@chromium.org>
Cr-Commit-Position: refs/heads/main@{#81615}
This commit is contained in:
Tobias Tebbi 2022-07-08 12:44:56 +00:00 committed by V8 LUCI CQ
parent 8103fe573a
commit 9d45d274b1
7 changed files with 295 additions and 7 deletions

View File

@ -2851,6 +2851,8 @@ filegroup(
"src/compiler/store-store-elimination.cc",
"src/compiler/store-store-elimination.h",
"src/compiler/turboshaft/assembler.h",
"src/compiler/turboshaft/decompression-optimization.cc",
"src/compiler/turboshaft/decompression-optimization.h",
"src/compiler/turboshaft/deopt-data.h",
"src/compiler/turboshaft/graph-builder.cc",
"src/compiler/turboshaft/graph-builder.h",

View File

@ -1525,6 +1525,7 @@ config("v8_gcov_coverage_cflags") {
cflags = [
"-fprofile-arcs",
"-ftest-coverage",
# We already block on gcc warnings on other bots. Let's not block here to
# always generate coverage reports.
"-Wno-error",
@ -2932,6 +2933,7 @@ v8_header_set("v8_internal_headers") {
"src/compiler/state-values-utils.h",
"src/compiler/store-store-elimination.h",
"src/compiler/turboshaft/assembler.h",
"src/compiler/turboshaft/decompression-optimization.h",
"src/compiler/turboshaft/deopt-data.h",
"src/compiler/turboshaft/graph-builder.h",
"src/compiler/turboshaft/graph-visualizer.h",
@ -4170,6 +4172,7 @@ v8_source_set("v8_turboshaft") {
visibility = [ ":*" ] # Only targets in this file can depend on this.
sources = [
"src/compiler/turboshaft/decompression-optimization.cc",
"src/compiler/turboshaft/graph-builder.cc",
"src/compiler/turboshaft/graph-visualizer.cc",
"src/compiler/turboshaft/graph.cc",
@ -6205,9 +6208,7 @@ group("v8_clusterfuzz") {
group("v8_gcc_light") {
testonly = true
deps = [
":d8",
]
deps = [ ":d8" ]
}
group("v8_archive") {

View File

@ -78,6 +78,7 @@
#include "src/compiler/simplified-operator.h"
#include "src/compiler/store-store-elimination.h"
#include "src/compiler/turboshaft/assembler.h"
#include "src/compiler/turboshaft/decompression-optimization.h"
#include "src/compiler/turboshaft/graph-builder.h"
#include "src/compiler/turboshaft/graph-visualizer.h"
#include "src/compiler/turboshaft/graph.h"
@ -176,7 +177,6 @@ class PipelineData {
assembler_options_(AssemblerOptions::Default(isolate)) {
PhaseScope scope(pipeline_statistics, "V8.TFInitPipelineData");
graph_ = graph_zone_->New<Graph>(graph_zone_);
turboshaft_graph_ = std::make_unique<turboshaft::Graph>(graph_zone_);
source_positions_ = graph_zone_->New<SourcePositionTable>(graph_);
node_origins_ = info->trace_turbo_json()
? graph_zone_->New<NodeOriginTable>(graph_)
@ -350,6 +350,11 @@ class PipelineData {
Zone* graph_zone() const { return graph_zone_; }
Graph* graph() const { return graph_; }
void set_graph(Graph* graph) { graph_ = graph; }
void CreateTurboshaftGraph() {
DCHECK_NULL(turboshaft_graph_);
turboshaft_graph_ = std::make_unique<turboshaft::Graph>(graph_zone_);
}
bool HasTurboshaftGraph() const { return turboshaft_graph_ != nullptr; }
turboshaft::Graph& turboshaft_graph() const { return *turboshaft_graph_; }
SourcePositionTable* source_positions() const { return source_positions_; }
NodeOriginTable* node_origins() const { return node_origins_; }
@ -2004,7 +2009,11 @@ struct DecompressionOptimizationPhase {
DECL_PIPELINE_PHASE_CONSTANTS(DecompressionOptimization)
void Run(PipelineData* data, Zone* temp_zone) {
if (COMPRESS_POINTERS_BOOL) {
if (!COMPRESS_POINTERS_BOOL) return;
if (data->HasTurboshaftGraph()) {
turboshaft::RunDecompressionOptimization(data->turboshaft_graph(),
temp_zone);
} else {
DecompressionOptimizer decompression_optimizer(
temp_zone, data->graph(), data->common(), data->machine());
decompression_optimizer.Reduce();
@ -2028,6 +2037,7 @@ struct BuildTurboshaftPhase {
base::Optional<BailoutReason> Run(PipelineData* data, Zone* temp_zone) {
Schedule* schedule = data->schedule();
data->reset_schedule();
data->CreateTurboshaftGraph();
return turboshaft::BuildGraph(schedule, data->graph_zone(), temp_zone,
&data->turboshaft_graph(),
data->source_positions());
@ -2928,8 +2938,10 @@ bool PipelineImpl::OptimizeGraph(Linkage* linkage) {
Run<MachineOperatorOptimizationPhase>();
RunPrintAndVerify(MachineOperatorOptimizationPhase::phase_name(), true);
Run<DecompressionOptimizationPhase>();
RunPrintAndVerify(DecompressionOptimizationPhase::phase_name(), true);
if (!FLAG_turboshaft) {
Run<DecompressionOptimizationPhase>();
RunPrintAndVerify(DecompressionOptimizationPhase::phase_name(), true);
}
Run<BranchConditionDuplicationPhase>();
RunPrintAndVerify(BranchConditionDuplicationPhase::phase_name(), true);
@ -2952,6 +2964,10 @@ bool PipelineImpl::OptimizeGraph(Linkage* linkage) {
Run<OptimizeTurboshaftPhase>();
Run<PrintTurboshaftGraphPhase>(OptimizeTurboshaftPhase::phase_name());
Run<DecompressionOptimizationPhase>();
Run<PrintTurboshaftGraphPhase>(
DecompressionOptimizationPhase::phase_name());
Run<TurboshaftRecreateSchedulePhase>(linkage);
TraceSchedule(data->info(), data, data->schedule(),
TurboshaftRecreateSchedulePhase::phase_name());

View File

@ -0,0 +1,221 @@
// Copyright 2022 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "src/compiler/turboshaft/decompression-optimization.h"
#include "src/base/v8-fallthrough.h"
#include "src/codegen/machine-type.h"
#include "src/compiler/turboshaft/operations.h"
#include "src/compiler/turboshaft/optimization-phase.h"
namespace v8::internal::compiler::turboshaft {
namespace {
// Analyze the uses of values to determine if a compressed value has any uses
// that need it to be decompressed. Since this analysis looks at uses, we
// iterate the graph backwards, updating the analysis state for the inputs of an
// operation. Due to loop phis, we need to compute a fixed-point. Therefore, we
// re-visit the loop if a loop phi backedge changes something. As a performance
// optimization, we keep track of operations (`candidates`) that need to be
// updated potentially, so that we don't have to walk the whole graph again.
struct DecompressionAnalyzer : AnalyzerBase {
using Base = AnalyzerBase;
// We use `uint8_t` instead of `bool` here to avoid the bitvector optimization
// of std::vector.
FixedSidetable<uint8_t> needs_decompression;
ZoneVector<OpIndex> candidates;
DecompressionAnalyzer(const Graph& graph, Zone* phase_zone)
: AnalyzerBase(graph, phase_zone),
needs_decompression(graph.op_id_count(), phase_zone),
candidates(phase_zone) {
candidates.reserve(graph.op_id_count() / 8);
}
void Run() {
for (uint32_t next_block_id = graph.block_count() - 1; next_block_id > 0;) {
BlockIndex block_index = BlockIndex(next_block_id);
--next_block_id;
const Block& block = graph.Get(block_index);
if (block.IsLoop()) {
ProcessBlock<true>(block, &next_block_id);
} else {
ProcessBlock<false>(block, &next_block_id);
}
}
}
bool NeedsDecompression(OpIndex op) { return needs_decompression[op]; }
bool NeedsDecompression(const Operation& op) {
return NeedsDecompression(graph.Index(op));
}
bool MarkAsNeedsDecompression(OpIndex op) {
return needs_decompression[op] = true;
}
template <bool is_loop>
void ProcessBlock(const Block& block, uint32_t* next_block_id) {
for (const Operation& op : base::Reversed(graph.operations(block))) {
if (is_loop && op.Is<PhiOp>() && NeedsDecompression(op)) {
const PhiOp& phi = op.Cast<PhiOp>();
if (!NeedsDecompression(phi.input(1))) {
Block* backedge = block.LastPredecessor();
*next_block_id =
std::max<uint32_t>(*next_block_id, backedge->index().id());
}
}
ProcessOperation(op);
}
}
void ProcessOperation(const Operation& op);
};
void DecompressionAnalyzer::ProcessOperation(const Operation& op) {
switch (op.opcode) {
case Opcode::kStore: {
auto& store = op.Cast<StoreOp>();
MarkAsNeedsDecompression(store.base());
if (!IsAnyTagged(store.stored_rep))
MarkAsNeedsDecompression(store.value());
break;
}
case Opcode::kIndexedStore: {
auto& store = op.Cast<IndexedStoreOp>();
MarkAsNeedsDecompression(store.base());
MarkAsNeedsDecompression(store.index());
if (!IsAnyTagged(store.stored_rep))
MarkAsNeedsDecompression(store.value());
break;
}
case Opcode::kFrameState:
// The deopt code knows how to handle Compressed inputs, both
// MachineRepresentation kCompressed values and CompressedHeapConstants.
break;
case Opcode::kPhi: {
// Replicate the phi's state for its inputs.
auto& phi = op.Cast<PhiOp>();
if (NeedsDecompression(op)) {
for (OpIndex input : phi.inputs()) {
MarkAsNeedsDecompression(input);
}
} else {
candidates.push_back(graph.Index(op));
}
break;
}
case Opcode::kEqual: {
auto& equal = op.Cast<EqualOp>();
if (equal.rep == MachineRepresentation::kWord64) {
MarkAsNeedsDecompression(equal.left());
MarkAsNeedsDecompression(equal.right());
}
break;
}
case Opcode::kComparison: {
auto& comp = op.Cast<ComparisonOp>();
if (comp.rep == MachineRepresentation::kWord64) {
MarkAsNeedsDecompression(comp.left());
MarkAsNeedsDecompression(comp.right());
}
break;
}
case Opcode::kBinop: {
auto& binary_op = op.Cast<BinopOp>();
if (binary_op.rep == MachineRepresentation::kWord64) {
MarkAsNeedsDecompression(binary_op.left());
MarkAsNeedsDecompression(binary_op.right());
}
break;
}
case Opcode::kShift: {
auto& shift_op = op.Cast<ShiftOp>();
if (shift_op.rep == MachineRepresentation::kWord64) {
MarkAsNeedsDecompression(shift_op.left());
}
break;
}
case Opcode::kChange: {
auto& change = op.Cast<ChangeOp>();
if (change.to == MachineRepresentation::kWord64 &&
NeedsDecompression(op)) {
MarkAsNeedsDecompression(change.input());
}
break;
}
case Opcode::kTaggedBitcast: {
auto& bitcast = op.Cast<TaggedBitcastOp>();
if (NeedsDecompression(op)) {
MarkAsNeedsDecompression(bitcast.input());
}
break;
}
case Opcode::kIndexedLoad:
case Opcode::kLoad:
case Opcode::kConstant:
if (!NeedsDecompression(op)) {
candidates.push_back(graph.Index(op));
}
V8_FALLTHROUGH;
default:
for (OpIndex input : op.inputs()) {
MarkAsNeedsDecompression(input);
}
break;
}
}
} // namespace
// Instead of using `OptimizationPhase`, we directly mutate the operations after
// the analysis. Doing it in-place is possible because we only modify operation
// options.
void RunDecompressionOptimization(Graph& graph, Zone* phase_zone) {
DecompressionAnalyzer analyzer(graph, phase_zone);
analyzer.Run();
for (OpIndex op_idx : analyzer.candidates) {
Operation& op = graph.Get(op_idx);
if (analyzer.NeedsDecompression(op)) continue;
switch (op.opcode) {
case Opcode::kConstant: {
auto& constant = op.Cast<ConstantOp>();
if (constant.kind == ConstantOp::Kind::kHeapObject) {
constant.kind = ConstantOp::Kind::kCompressedHeapObject;
}
break;
}
case Opcode::kPhi: {
auto& phi = op.Cast<PhiOp>();
if (phi.rep == MachineRepresentation::kTagged) {
phi.rep = MachineRepresentation::kCompressed;
} else if (phi.rep == MachineRepresentation::kTaggedPointer) {
phi.rep = MachineRepresentation::kCompressedPointer;
}
break;
}
case Opcode::kLoad: {
auto& load = op.Cast<LoadOp>();
if (load.loaded_rep == MachineType::AnyTagged()) {
load.loaded_rep = MachineType::AnyCompressed();
} else if (load.loaded_rep == MachineType::TaggedPointer()) {
load.loaded_rep = MachineType::CompressedPointer();
}
break;
}
case Opcode::kIndexedLoad: {
auto& load = op.Cast<IndexedLoadOp>();
if (load.loaded_rep == MachineType::AnyTagged()) {
load.loaded_rep = MachineType::AnyCompressed();
} else if (load.loaded_rep == MachineType::TaggedPointer()) {
load.loaded_rep = MachineType::CompressedPointer();
}
break;
}
default:
break;
}
}
}
} // namespace v8::internal::compiler::turboshaft

View File

@ -0,0 +1,25 @@
// Copyright 2022 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef V8_COMPILER_TURBOSHAFT_DECOMPRESSION_OPTIMIZATION_H_
#define V8_COMPILER_TURBOSHAFT_DECOMPRESSION_OPTIMIZATION_H_
namespace v8::internal {
class Zone;
}
namespace v8::internal::compiler::turboshaft {
class Graph;
// The purpose of decompression optimization is to avoid unnecessary pointer
// decompression operations. If a compressed value loaded from the heap is only
// used as a Smi or to store it back into the heap, then there is no need to add
// the root pointer to make it dereferencable. By performing this optimization
// late in the pipeline, all the preceding phases can safely assume that
// everything is decompressed and do not need to worry about the distinction
// between compressed and uncompressed pointers.
void RunDecompressionOptimization(Graph& graph, Zone* phase_zone);
} // namespace v8::internal::compiler::turboshaft
#endif // V8_COMPILER_TURBOSHAFT_DECOMPRESSION_OPTIMIZATION_H_

View File

@ -128,6 +128,7 @@ class OperationBuffer {
DCHECK_GT(operation_sizes_[idx.id()], 0);
OpIndex result = OpIndex(idx.offset() + operation_sizes_[idx.id()] *
sizeof(OperationStorageSlot));
DCHECK_LT(0, result.offset());
DCHECK_LE(result.offset(), capacity() * sizeof(OperationStorageSlot));
return result;
}
@ -136,6 +137,7 @@ class OperationBuffer {
DCHECK_GT(operation_sizes_[idx.id() - 1], 0);
OpIndex result = OpIndex(idx.offset() - operation_sizes_[idx.id() - 1] *
sizeof(OperationStorageSlot));
DCHECK_LE(0, result.offset());
DCHECK_LT(result.offset(), capacity() * sizeof(OperationStorageSlot));
return result;
}

View File

@ -66,6 +66,27 @@ class GrowingSidetable {
}
};
// A fixed-size sidetable mapping from `OpIndex` to `T`.
// Elements are default-initialized.
template <class T>
class FixedSidetable {
public:
explicit FixedSidetable(size_t size, Zone* zone) : table_(size, zone) {}
T& operator[](OpIndex op) {
DCHECK_LT(op.id(), table_.size());
return table_[op.id()];
}
const T& operator[](OpIndex op) const {
DCHECK_LT(op.id(), table_.size());
return table_[op.id()];
}
private:
ZoneVector<T> table_;
};
} // namespace v8::internal::compiler::turboshaft
#endif // V8_COMPILER_TURBOSHAFT_SIDETABLE_H_