[compiler] Simplify "==0" branches in MachineOperatorReducer

Bug: v8:12484
Change-Id: I0667c7464c0dd71338bc199a24a69248a7a0a525
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3497303
Reviewed-by: Tobias Tebbi <tebbi@chromium.org>
Owners-Override: Tobias Tebbi <tebbi@chromium.org>
Commit-Queue: Darius Mercadier <dmercadier@chromium.org>
Cr-Commit-Position: refs/heads/main@{#79379}
This commit is contained in:
Darius M 2022-03-04 20:28:42 +01:00 committed by V8 LUCI CQ
parent 683a4f6b89
commit 48b443f692
8 changed files with 319 additions and 3 deletions

View File

@ -2536,6 +2536,8 @@ filegroup(
"src/compiler/backend/unwinding-info-writer.h",
"src/compiler/basic-block-instrumentor.cc",
"src/compiler/basic-block-instrumentor.h",
"src/compiler/branch-condition-duplicator.cc",
"src/compiler/branch-condition-duplicator.h",
"src/compiler/branch-elimination.cc",
"src/compiler/branch-elimination.h",
"src/compiler/bytecode-analysis.cc",

View File

@ -2776,6 +2776,7 @@ v8_header_set("v8_internal_headers") {
"src/compiler/backend/spill-placer.h",
"src/compiler/backend/unwinding-info-writer.h",
"src/compiler/basic-block-instrumentor.h",
"src/compiler/branch-condition-duplicator.h",
"src/compiler/branch-elimination.h",
"src/compiler/bytecode-analysis.h",
"src/compiler/bytecode-graph-builder.h",
@ -3874,6 +3875,7 @@ v8_compiler_sources = [
"src/compiler/backend/register-allocator.cc",
"src/compiler/backend/spill-placer.cc",
"src/compiler/basic-block-instrumentor.cc",
"src/compiler/branch-condition-duplicator.cc",
"src/compiler/branch-elimination.cc",
"src/compiler/bytecode-analysis.cc",
"src/compiler/bytecode-graph-builder.cc",

View File

@ -0,0 +1,83 @@
// Copyright 2022 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "src/compiler/branch-condition-duplicator.h"
#include "src/compiler/backend/instruction-codes.h"
#include "src/compiler/graph.h"
#include "src/compiler/node-properties.h"
#include "src/compiler/opcodes.h"
namespace v8 {
namespace internal {
namespace compiler {
namespace {
bool IsBranch(Node* node) { return node->opcode() == IrOpcode::kBranch; }
bool CanDuplicate(Node* node) {
if (node->op()->EffectOutputCount() > 0) {
return false;
}
if (node->op()->ControlOutputCount() > 0) {
return false;
}
switch (node->opcode()) {
case IrOpcode::kProjection:
case IrOpcode::kParameter:
case IrOpcode::kOsrValue:
return false;
default:
break;
}
return true;
}
} // namespace
Node* BranchConditionDuplicator::DuplicateNode(Node* node) {
return graph_->CloneNode(node);
}
void BranchConditionDuplicator::DuplicateConditionIfNeeded(Node* node) {
if (!IsBranch(node)) return;
Node* condNode = node->InputAt(0);
if (condNode->UseCount() > 1 && CanDuplicate(condNode)) {
node->ReplaceInput(0, DuplicateNode(condNode));
}
}
void BranchConditionDuplicator::Enqueue(Node* node) {
if (seen_.Get(node)) return;
seen_.Set(node, true);
to_visit_.push(node);
}
void BranchConditionDuplicator::VisitNode(Node* node) {
DuplicateConditionIfNeeded(node);
for (int i = 0; i < node->op()->ControlInputCount(); i++) {
Enqueue(NodeProperties::GetControlInput(node, i));
}
}
void BranchConditionDuplicator::ProcessGraph() {
Enqueue(graph_->end());
while (!to_visit_.empty()) {
Node* node = to_visit_.front();
to_visit_.pop();
VisitNode(node);
}
}
BranchConditionDuplicator::BranchConditionDuplicator(Zone* zone, Graph* graph)
: graph_(graph), to_visit_(zone), seen_(graph, 2) {}
void BranchConditionDuplicator::Reduce() { ProcessGraph(); }
} // namespace compiler
} // namespace internal
} // namespace v8

View File

@ -0,0 +1,85 @@
// Copyright 2022 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef V8_COMPILER_BRANCH_CONDITION_DUPLICATOR_H_
#define V8_COMPILER_BRANCH_CONDITION_DUPLICATOR_H_
#include "src/base/macros.h"
#include "src/compiler/node-marker.h"
#include "src/compiler/node.h"
#include "src/zone/zone.h"
namespace v8 {
namespace internal {
namespace compiler {
// Forward declare.
class Graph;
// BranchConditionDuplicator makes sure that the condition nodes of branches are
// used only once. When it finds a branch node whose condition has multiples
// uses, this condition is duplicated.
//
// Doing this enables the InstructionSelector to generate more efficient code
// for branches. For instance, consider this code:
//
// if (a + b == 0) { /* some code */ }
// if (a + b == 0) { /* more code */ }
//
// Then the generated code will be something like (using registers "ra" for "a"
// and "rb" for "b", and "rt" a temporary register):
//
// add ra, rb ; a + b
// cmp ra, 0 ; (a + b) == 0
// sete rt ; rt = (a + b) == 0
// cmp rt, 0 ; rt == 0
// jz
// ...
// cmp rt, 0 ; rt == 0
// jz
//
// As you can see, TurboFan materialized the == bit into a temporary register.
// However, since the "add" instruction sets the ZF flag (on x64), it can be
// used to determine wether the jump should be taken or not. The code we'd like
// to generate instead if thus:
//
// add ra, rb
// jnz
// ...
// add ra, rb
// jnz
//
// However, this requires to generate twice the instruction "add ra, rb". Due to
// how virtual registers are assigned in TurboFan (there is a map from node ID
// to virtual registers), both "add" instructions will use the same virtual
// register as output, which will break SSA.
//
// In order to overcome this issue, BranchConditionDuplicator duplicates branch
// conditions that are used more than once, so that they can be generated right
// before each branch without worrying about breaking SSA.
class V8_EXPORT_PRIVATE BranchConditionDuplicator final {
public:
BranchConditionDuplicator(Zone* zone, Graph* graph);
~BranchConditionDuplicator() = default;
void Reduce();
Node* DuplicateNode(Node* node);
void DuplicateConditionIfNeeded(Node* node);
void Enqueue(Node* node);
void VisitNode(Node* node);
void ProcessGraph();
private:
Graph* const graph_;
ZoneQueue<Node*> to_visit_;
NodeMarker<bool> seen_;
};
} // namespace compiler
} // namespace internal
} // namespace v8
#endif // V8_COMPILER_BRANCH_CONDITION_DUPLICATOR_H_

View File

@ -3,15 +3,19 @@
// found in the LICENSE file.
#include "src/compiler/machine-operator-reducer.h"
#include <cmath>
#include <limits>
#include "src/base/bits.h"
#include "src/base/division-by-constant.h"
#include "src/base/ieee754.h"
#include "src/base/logging.h"
#include "src/base/overflowing-math.h"
#include "src/codegen/tnode.h"
#include "src/compiler/diamond.h"
#include "src/compiler/graph.h"
#include "src/compiler/js-operator.h"
#include "src/compiler/machine-graph.h"
#include "src/compiler/node-matchers.h"
#include "src/compiler/node-properties.h"
@ -2135,6 +2139,124 @@ Reduction MachineOperatorReducer::ReduceFloat64RoundDown(Node* node) {
return NoChange();
}
namespace {
// Returns true if |node| is a constant whose value is 0.
bool IsZero(Node* node) {
switch (node->opcode()) {
#define CASE_IS_ZERO(opcode, matcher) \
case IrOpcode::opcode: { \
matcher m(node); \
return m.Is(0); \
}
CASE_IS_ZERO(kInt32Constant, Int32Matcher)
CASE_IS_ZERO(kInt64Constant, Int64Matcher)
#undef CASE_IS_ZERO
default:
break;
}
return false;
}
// If |node| is of the form "x == 0", then return "x" (in order to remove the
// "== 0" part).
base::Optional<Node*> TryGetInvertedCondition(Node* cond) {
if (cond->opcode() == IrOpcode::kWord32Equal) {
Int32BinopMatcher m(cond);
if (IsZero(m.right().node())) {
return m.left().node();
}
}
return base::nullopt;
}
struct SimplifiedCondition {
Node* condition;
bool is_inverted;
};
// Tries to simplifies |cond| by removing all top-level "== 0". Everytime such a
// construction is removed, the meaning of the comparison is inverted. This is
// recorded by the variable |is_inverted| throughout this function, and returned
// at the end. If |is_inverted| is true at the end, the caller should invert the
// if/else branches following the comparison.
base::Optional<SimplifiedCondition> TrySimplifyCompareZero(Node* cond) {
bool is_inverted = false;
bool changed = false;
base::Optional<Node*> new_cond;
while ((new_cond = TryGetInvertedCondition(cond)).has_value()) {
cond = *new_cond;
is_inverted = !is_inverted;
changed = true;
}
if (changed) {
return SimplifiedCondition{cond, is_inverted};
} else {
return {};
}
}
} // namespace
void MachineOperatorReducer::SwapBranches(Node* node) {
DCHECK_EQ(node->opcode(), IrOpcode::kBranch);
for (Node* const use : node->uses()) {
switch (use->opcode()) {
case IrOpcode::kIfTrue:
NodeProperties::ChangeOp(use, common()->IfFalse());
break;
case IrOpcode::kIfFalse:
NodeProperties::ChangeOp(use, common()->IfTrue());
break;
default:
UNREACHABLE();
}
}
NodeProperties::ChangeOp(
node, common()->Branch(NegateBranchHint(BranchHintOf(node->op()))));
}
// If |node| is a branch, removes all top-level 32-bit "== 0" from |node|.
Reduction MachineOperatorReducer::SimplifyBranch(Node* node) {
Node* cond = node->InputAt(0);
if (auto simplified = TrySimplifyCompareZero(cond)) {
node->ReplaceInput(0, simplified->condition);
if (simplified->is_inverted) {
switch (node->opcode()) {
case IrOpcode::kBranch:
SwapBranches(node);
break;
case IrOpcode::kTrapIf:
NodeProperties::ChangeOp(node,
common()->TrapUnless(TrapIdOf(node->op())));
break;
case IrOpcode::kTrapUnless:
NodeProperties::ChangeOp(node,
common()->TrapIf(TrapIdOf(node->op())));
break;
case IrOpcode::kDeoptimizeIf: {
DeoptimizeParameters p = DeoptimizeParametersOf(node->op());
NodeProperties::ChangeOp(
node,
common()->DeoptimizeUnless(p.kind(), p.reason(), p.feedback()));
break;
}
case IrOpcode::kDeoptimizeUnless: {
DeoptimizeParameters p = DeoptimizeParametersOf(node->op());
NodeProperties::ChangeOp(
node, common()->DeoptimizeIf(p.kind(), p.reason(), p.feedback()));
break;
}
default:
UNREACHABLE();
}
}
return Changed(node);
}
return NoChange();
}
Reduction MachineOperatorReducer::ReduceConditional(Node* node) {
DCHECK(node->opcode() == IrOpcode::kBranch ||
node->opcode() == IrOpcode::kDeoptimizeIf ||
@ -2145,17 +2267,18 @@ Reduction MachineOperatorReducer::ReduceConditional(Node* node) {
// Reductions involving control flow happen elsewhere. Non-zero inputs are
// considered true in all conditional ops.
NodeMatcher condition(NodeProperties::GetValueInput(node, 0));
Reduction reduction = NoChange();
if (condition.IsTruncateInt64ToInt32()) {
if (auto replacement =
ReduceConditionalN<Word64Adapter>(condition.node())) {
NodeProperties::ReplaceValueInput(node, *replacement, 0);
return Changed(node);
reduction = Changed(node);
}
} else if (auto replacement = ReduceConditionalN<Word32Adapter>(node)) {
NodeProperties::ReplaceValueInput(node, *replacement, 0);
return Changed(node);
reduction = Changed(node);
}
return NoChange();
return reduction.FollowedBy(SimplifyBranch(node));
}
template <typename WordNAdapter>

View File

@ -131,6 +131,12 @@ class V8_EXPORT_PRIVATE MachineOperatorReducer final
template <typename WordNAdapter>
Reduction ReduceWordNXor(Node* node);
// Tries to simplify "if(x == 0)" by removing the "== 0" and inverting
// branches.
Reduction SimplifyBranch(Node* node);
// Helper for SimplifyBranch; swaps the if/else of a branch.
void SwapBranches(Node* node);
// Helper for ReduceConditional. Does not perform the actual reduction; just
// returns a new Node that could be used as the input to the condition.
template <typename WordNAdapter>

View File

@ -28,6 +28,7 @@
#include "src/compiler/backend/register-allocator-verifier.h"
#include "src/compiler/backend/register-allocator.h"
#include "src/compiler/basic-block-instrumentor.h"
#include "src/compiler/branch-condition-duplicator.h"
#include "src/compiler/branch-elimination.h"
#include "src/compiler/bytecode-graph-builder.h"
#include "src/compiler/checkpoint-elimination.h"
@ -1976,6 +1977,16 @@ struct DecompressionOptimizationPhase {
}
};
struct BranchConditionDuplicationPhase {
DECL_PIPELINE_PHASE_CONSTANTS(BranchConditionDuplication)
void Run(PipelineData* data, Zone* temp_zone) {
BranchConditionDuplicator compare_zero_branch_optimizer(temp_zone,
data->graph());
compare_zero_branch_optimizer.Reduce();
}
};
#if V8_ENABLE_WEBASSEMBLY
struct WasmOptimizationPhase {
DECL_PIPELINE_PHASE_CONSTANTS(WasmOptimization)
@ -2780,6 +2791,9 @@ bool PipelineImpl::OptimizeGraph(Linkage* linkage) {
Run<DecompressionOptimizationPhase>();
RunPrintAndVerify(DecompressionOptimizationPhase::phase_name(), true);
Run<BranchConditionDuplicationPhase>();
RunPrintAndVerify(BranchConditionDuplicationPhase::phase_name(), true);
data->source_positions()->RemoveDecorator();
if (data->info()->trace_turbo_json()) {
data->node_origins()->RemoveDecorator();

View File

@ -322,6 +322,7 @@ class RuntimeCallTimer final {
ADD_THREAD_SPECIFIC_COUNTER(V, Optimize, AllocateGeneralRegisters) \
ADD_THREAD_SPECIFIC_COUNTER(V, Optimize, AssembleCode) \
ADD_THREAD_SPECIFIC_COUNTER(V, Optimize, AssignSpillSlots) \
ADD_THREAD_SPECIFIC_COUNTER(V, Optimize, BranchConditionDuplication) \
ADD_THREAD_SPECIFIC_COUNTER(V, Optimize, BuildLiveRangeBundles) \
ADD_THREAD_SPECIFIC_COUNTER(V, Optimize, BuildLiveRanges) \
ADD_THREAD_SPECIFIC_COUNTER(V, Optimize, BytecodeGraphBuilder) \