From 5bff98a677267f6235aa26e36421215a26142b49 Mon Sep 17 00:00:00 2001 From: Tobias Tebbi Date: Wed, 25 Jan 2023 15:47:53 +0100 Subject: [PATCH] [turboshaft] simplify block inlining This removes the `direct_input` option from `CloneAndInlineBlock`, which is now unnecessary thanks to the fix in https://chromium-review.googlesource.com/c/v8/v8/+/4191773 In addition, this simplifies the code of `CloneAndInlineBlock` by reducing the usage of assembler-global variables and by moving the phi logic into the same function. The creation of variables is now controlled by `current_block_needs_variables_` alone. To set this remaining global flag in a scoped fashion, this also adds the helper class `ScopedModification` (which is used to simplify `ReentrantScope` too). Bug: v8:12783 Change-Id: I979b110ea10921477efc4bf2c38bd56b5c573442 Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/4194203 Reviewed-by: Maya Lekova Commit-Queue: Tobias Tebbi Cr-Commit-Position: refs/heads/main@{#85496} --- .../turboshaft/assert-types-reducer.h | 11 +--- .../turboshaft/branch-elimination-reducer.h | 4 +- src/compiler/turboshaft/graph.h | 60 ------------------- src/compiler/turboshaft/optimization-phase.h | 40 +++++-------- src/compiler/turboshaft/utils.h | 19 ++++++ 5 files changed, 37 insertions(+), 97 deletions(-) diff --git a/src/compiler/turboshaft/assert-types-reducer.h b/src/compiler/turboshaft/assert-types-reducer.h index 7a59addcb3..adabcbd12b 100644 --- a/src/compiler/turboshaft/assert-types-reducer.h +++ b/src/compiler/turboshaft/assert-types-reducer.h @@ -23,16 +23,11 @@ namespace v8::internal::compiler::turboshaft { class DetectReentranceScope { public: - explicit DetectReentranceScope(bool* flag) - : is_reentrant_(*flag), flag_(flag) { - *flag_ = true; - } - ~DetectReentranceScope() { *flag_ = is_reentrant_; } - bool IsReentrant() const { return is_reentrant_; } + explicit DetectReentranceScope(bool* flag) : set_flag_(flag, true) {} + bool IsReentrant() const { return set_flag_.old_value(); } private: - bool is_reentrant_; - bool* flag_; + ScopedModification set_flag_; }; struct AssertTypesReducerArgs { diff --git a/src/compiler/turboshaft/branch-elimination-reducer.h b/src/compiler/turboshaft/branch-elimination-reducer.h index c013074212..b117c4ff6b 100644 --- a/src/compiler/turboshaft/branch-elimination-reducer.h +++ b/src/compiler/turboshaft/branch-elimination-reducer.h @@ -324,12 +324,10 @@ class BranchEliminationReducer : public Next { // The destination block in the old graph ends with a Return // and the old destination is a merge block, so we can directly // inline the destination block in place of the Goto. - // We pass `false` to `direct_input` here, as we're looking one - // block ahead of the current one. // TODO(nicohartmann@): Temporarily disable this "optimization" because it // prevents dead code elimination in some cases. Reevaluate this and // reenable if phases have been reordered properly. - // Asm().CloneAndInlineBlock(old_dst, false); + // Asm().CloneAndInlineBlock(old_dst); // return OpIndex::Invalid(); } diff --git a/src/compiler/turboshaft/graph.h b/src/compiler/turboshaft/graph.h index b4b90b3c91..f7696e8335 100644 --- a/src/compiler/turboshaft/graph.h +++ b/src/compiler/turboshaft/graph.h @@ -330,66 +330,6 @@ class Block : public RandomAccessStackDominatorNode { return pred_count - pred_reverse_index - 1; } - // Returns the index of {target} in the predecessors of the current Block - // and if target is not in the list of predecessors, recursively traverses - // the predecessor graph up. - int GetAnyPredecessorIndex(const Block* target, Zone* zone) const { - int pred_count = 0; - int pred_reverse_index = -1; - // The tuple contains (predecessor, index_in_successor, successor). - ZoneQueue> to_visit(zone); - for (Block* pred = last_predecessor_; pred != nullptr; - pred = pred->neighboring_predecessor_) { - if (pred == target) { - DCHECK_EQ(pred_reverse_index, -1); - pred_reverse_index = pred_count; - } else { - to_visit.push(std::make_tuple(pred, pred_count, this)); - } - pred_count++; - } - if (pred_reverse_index != -1) { - // The target was found in the predecessors list. - return pred_count - pred_reverse_index - 1; - } - - const Block* current = nullptr; - int pred_index = -1; - const Block* successor = nullptr; - // The target is not a direct predecessor of the current block. - while (!to_visit.empty()) { - std::tie(current, pred_index, successor) = to_visit.front(); - to_visit.pop(); - if (current == target) { - DCHECK_EQ(pred_reverse_index, -1); - pred_reverse_index = pred_index; - break; - } else { - int pred_count = 0; - for (Block* pred = current->last_predecessor_; pred != nullptr; - pred = pred->neighboring_predecessor_) { - to_visit.push(std::make_tuple(pred, pred_count, current)); - pred_count++; - } - } - } - if (pred_reverse_index == -1) { - // Target was not found by the BFS above. - return -1; - } - - // The target was found in the predecessors list, we need to reconstruct - // the predecessors count for the current (found) Block's successor. - DCHECK_NOT_NULL(current); - DCHECK_NOT_NULL(successor); - pred_count = 0; - for (Block* pred = successor->last_predecessor_; pred != nullptr; - pred = pred->neighboring_predecessor_) { - pred_count++; - } - return pred_count - pred_reverse_index - 1; - } - // HasExactlyNPredecessors(n) returns the same result as // `PredecessorCount() == n`, but stops early and iterates at most the first // {n} predecessors. diff --git a/src/compiler/turboshaft/optimization-phase.h b/src/compiler/turboshaft/optimization-phase.h index ff2b9f6882..99c190a8ef 100644 --- a/src/compiler/turboshaft/optimization-phase.h +++ b/src/compiler/turboshaft/optimization-phase.h @@ -154,7 +154,6 @@ class GraphVisitor { current_input_block_(nullptr), block_mapping_(input_graph.block_count(), nullptr, phase_zone), op_mapping_(input_graph.op_id_count(), OpIndex::Invalid(), phase_zone), - visiting_cloned_block_(false), blocks_needing_variables(phase_zone), old_opindex_to_variables(input_graph.op_id_count(), phase_zone) { output_graph_.Reset(); @@ -212,17 +211,12 @@ class GraphVisitor { Graph& modifiable_input_graph() const { return input_graph_; } // Visits and emits {input_block} right now (ie, in the current block). - void CloneAndInlineBlock(const Block* input_block, bool direct_input = true) { + void CloneAndInlineBlock(const Block* input_block) { // Computing which input of Phi operations to use when visiting // {input_block} (since {input_block} doesn't really have predecessors // anymore). - if (direct_input) { - added_block_phi_input_ = input_block->GetPredecessorIndex( - assembler().current_block()->Origin()); - } else { - added_block_phi_input_ = input_block->GetAnyPredecessorIndex( - assembler().current_block()->Origin(), phase_zone_); - } + int added_block_phi_input = + input_block->GetPredecessorIndex(assembler().current_block()->Origin()); // There is no guarantees that {input_block} will be entirely removed just // because it's cloned/inlined, since it's possible that it has predecessors @@ -237,11 +231,18 @@ class GraphVisitor { // still properly be done (in OptimizationPhase::ReducePhi). assembler().current_block()->SetOrigin(input_block); - visiting_cloned_block_ = true; + ScopedModification set_true(¤t_block_needs_variables_, true); for (OpIndex index : input_graph().OperationIndices(*input_block)) { - if (!VisitOp(index, input_block)) break; + if (const PhiOp* phi = + input_graph().Get(index).template TryCast()) { + // This Phi has been cloned/inlined, and has thus now a single + // predecessor, and shouldn't be a Phi anymore. + CreateOldToNewMapping(index, + MapToNewGraph(phi->input(added_block_phi_input))); + } else { + if (!VisitOp(index, input_block)) break; + } } - visiting_cloned_block_ = false; } template @@ -456,11 +457,6 @@ class GraphVisitor { op.input(PhiOp::kLoopPhiBackEdgeIndex)); } - if (visiting_cloned_block_) { - // This Phi has been cloned/inlined, and has thus now a single - // predecessor, and shouldn't be a Phi anymore. - return MapToNewGraph(op.input(added_block_phi_input_)); - } base::Vector old_inputs = op.inputs(); base::SmallVector new_inputs; int predecessor_count = assembler().current_block()->PredecessorCount(); @@ -744,7 +740,7 @@ class GraphVisitor { } void CreateOldToNewMapping(OpIndex old_index, OpIndex new_index) { - if (visiting_cloned_block_ || current_block_needs_variables_) { + if (current_block_needs_variables_) { MaybeVariable var = GetVariableFor(old_index); if (!var.has_value()) { base::Optional rep = @@ -816,14 +812,6 @@ class GraphVisitor { ZoneVector block_mapping_; ZoneVector op_mapping_; - // {visiting_cloned_block_} is set to true when cloning a block, which impacts - // how Phis are reduced, and how mappings from old to new OpIndex are - // maintained. - bool visiting_cloned_block_ = false; - // When visiting a cloned block, the {added_block_phi_input_}th of Phis is - // used to replace those Phis. - int added_block_phi_input_; - // {current_block_needs_variables_} is set to true if the current block should // use Variables to map old to new OpIndex rather than just {op_mapping}. This // is typically the case when the block has been cloned. diff --git a/src/compiler/turboshaft/utils.h b/src/compiler/turboshaft/utils.h index 5ef2a67ee8..62711415b6 100644 --- a/src/compiler/turboshaft/utils.h +++ b/src/compiler/turboshaft/utils.h @@ -82,6 +82,25 @@ bool ShouldSkipOptimizationStep(); inline bool ShouldSkipOptimizationStep() { return false; } #endif +// Set `*ptr` to `new_value` while the scope is active, reset to the previous +// value upon destruction. +template +class ScopedModification { + public: + ScopedModification(T* ptr, T new_value) + : ptr_(ptr), old_value_(std::move(*ptr)) { + *ptr = std::move(new_value); + } + + ~ScopedModification() { *ptr_ = std::move(old_value_); } + + const T& old_value() const { return old_value_; } + + private: + T* ptr_; + T old_value_; +}; + } // namespace v8::internal::compiler::turboshaft #endif // V8_COMPILER_TURBOSHAFT_UTILS_H_