[arm64] Fold comparisons into branches for Word64
Make TryEmitCbzOrTbz a template, so it can be used for Word64 as well as Word32. 0.09% reduction of embedded builtins size with a arm64 ptr-compr build. Some of the unittests weren't ported to Word64 as they don't pass, this is due to VisitWordCompare missing a loop to remove Word64Equal comparisons against 0. This can be added in a different CL if needed. Change-Id: I927129d934083b71abe5b77991c39286470a228d Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/1792908 Commit-Queue: Martyn Capewell <martyn.capewell@arm.com> Reviewed-by: Michael Starzinger <mstarzinger@chromium.org> Cr-Commit-Position: refs/heads/master@{#63751}
This commit is contained in:
parent
d287e4bc46
commit
01b5a7ed67
@ -1830,31 +1830,6 @@ void VisitCompare(InstructionSelector* selector, InstructionCode opcode,
|
||||
selector->EmitWithContinuation(opcode, left, right, cont);
|
||||
}
|
||||
|
||||
// Shared routine for multiple word compare operations.
|
||||
void VisitWordCompare(InstructionSelector* selector, Node* node,
|
||||
InstructionCode opcode, FlagsContinuation* cont,
|
||||
ImmediateMode immediate_mode) {
|
||||
Arm64OperandGenerator g(selector);
|
||||
|
||||
Node* left = node->InputAt(0);
|
||||
Node* right = node->InputAt(1);
|
||||
|
||||
// If one of the two inputs is an immediate, make sure it's on the right.
|
||||
if (!g.CanBeImmediate(right, immediate_mode) &&
|
||||
g.CanBeImmediate(left, immediate_mode)) {
|
||||
cont->Commute();
|
||||
std::swap(left, right);
|
||||
}
|
||||
|
||||
if (g.CanBeImmediate(right, immediate_mode)) {
|
||||
VisitCompare(selector, opcode, g.UseRegister(left), g.UseImmediate(right),
|
||||
cont);
|
||||
} else {
|
||||
VisitCompare(selector, opcode, g.UseRegister(left), g.UseRegister(right),
|
||||
cont);
|
||||
}
|
||||
}
|
||||
|
||||
// This function checks whether we can convert:
|
||||
// ((a <op> b) cmp 0), b.<cond>
|
||||
// to:
|
||||
@ -1986,9 +1961,35 @@ void EmitBranchOrDeoptimize(InstructionSelector* selector,
|
||||
selector->EmitWithContinuation(opcode, value, cont);
|
||||
}
|
||||
|
||||
template <int N>
|
||||
struct CbzOrTbzMatchTrait {};
|
||||
|
||||
template <>
|
||||
struct CbzOrTbzMatchTrait<32> {
|
||||
using IntegralType = uint32_t;
|
||||
using BinopMatcher = Int32BinopMatcher;
|
||||
static constexpr IrOpcode::Value kAndOpcode = IrOpcode::kWord32And;
|
||||
static constexpr ArchOpcode kTestAndBranchOpcode = kArm64TestAndBranch32;
|
||||
static constexpr ArchOpcode kCompareAndBranchOpcode =
|
||||
kArm64CompareAndBranch32;
|
||||
static constexpr unsigned kSignBit = kWSignBit;
|
||||
};
|
||||
|
||||
template <>
|
||||
struct CbzOrTbzMatchTrait<64> {
|
||||
using IntegralType = uint64_t;
|
||||
using BinopMatcher = Int64BinopMatcher;
|
||||
static constexpr IrOpcode::Value kAndOpcode = IrOpcode::kWord64And;
|
||||
static constexpr ArchOpcode kTestAndBranchOpcode = kArm64TestAndBranch;
|
||||
static constexpr ArchOpcode kCompareAndBranchOpcode = kArm64CompareAndBranch;
|
||||
static constexpr unsigned kSignBit = kXSignBit;
|
||||
};
|
||||
|
||||
// Try to emit TBZ, TBNZ, CBZ or CBNZ for certain comparisons of {node}
|
||||
// against {value}, depending on the condition.
|
||||
bool TryEmitCbzOrTbz(InstructionSelector* selector, Node* node, uint32_t value,
|
||||
template <int N>
|
||||
bool TryEmitCbzOrTbz(InstructionSelector* selector, Node* node,
|
||||
typename CbzOrTbzMatchTrait<N>::IntegralType value,
|
||||
Node* user, FlagsCondition cond, FlagsContinuation* cont) {
|
||||
// Branch poisoning requires flags to be set, so when it's enabled for
|
||||
// a particular branch, we shouldn't be applying the cbz/tbz optimization.
|
||||
@ -2007,28 +2008,33 @@ bool TryEmitCbzOrTbz(InstructionSelector* selector, Node* node, uint32_t value,
|
||||
if (cont->IsDeoptimize()) return false;
|
||||
Arm64OperandGenerator g(selector);
|
||||
cont->Overwrite(MapForTbz(cond));
|
||||
|
||||
if (N == 32) {
|
||||
Int32Matcher m(node);
|
||||
if (m.IsFloat64ExtractHighWord32() && selector->CanCover(user, node)) {
|
||||
// SignedLessThan(Float64ExtractHighWord32(x), 0) and
|
||||
// SignedGreaterThanOrEqual(Float64ExtractHighWord32(x), 0) essentially
|
||||
// check the sign bit of a 64-bit floating point value.
|
||||
// SignedGreaterThanOrEqual(Float64ExtractHighWord32(x), 0)
|
||||
// essentially check the sign bit of a 64-bit floating point value.
|
||||
InstructionOperand temp = g.TempRegister();
|
||||
selector->Emit(kArm64U64MoveFloat64, temp,
|
||||
g.UseRegister(node->InputAt(0)));
|
||||
selector->EmitWithContinuation(kArm64TestAndBranch, temp,
|
||||
g.TempImmediate(63), cont);
|
||||
g.TempImmediate(kDSignBit), cont);
|
||||
return true;
|
||||
}
|
||||
selector->EmitWithContinuation(kArm64TestAndBranch32, g.UseRegister(node),
|
||||
g.TempImmediate(31), cont);
|
||||
}
|
||||
|
||||
selector->EmitWithContinuation(
|
||||
CbzOrTbzMatchTrait<N>::kTestAndBranchOpcode, g.UseRegister(node),
|
||||
g.TempImmediate(CbzOrTbzMatchTrait<N>::kSignBit), cont);
|
||||
return true;
|
||||
}
|
||||
case kEqual:
|
||||
case kNotEqual: {
|
||||
if (node->opcode() == IrOpcode::kWord32And) {
|
||||
if (node->opcode() == CbzOrTbzMatchTrait<N>::kAndOpcode) {
|
||||
// Emit a tbz/tbnz if we are comparing with a single-bit mask:
|
||||
// Branch(Word32Equal(Word32And(x, 1 << N), 1 << N), true, false)
|
||||
Int32BinopMatcher m_and(node);
|
||||
// Branch(WordEqual(WordAnd(x, 1 << N), 1 << N), true, false)
|
||||
typename CbzOrTbzMatchTrait<N>::BinopMatcher m_and(node);
|
||||
if (cont->IsBranch() && base::bits::IsPowerOfTwo(value) &&
|
||||
m_and.right().Is(value) && selector->CanCover(user, node)) {
|
||||
Arm64OperandGenerator g(selector);
|
||||
@ -2036,7 +2042,8 @@ bool TryEmitCbzOrTbz(InstructionSelector* selector, Node* node, uint32_t value,
|
||||
// the opposite here so negate the condition.
|
||||
cont->Negate();
|
||||
selector->EmitWithContinuation(
|
||||
kArm64TestAndBranch32, g.UseRegister(m_and.left().node()),
|
||||
CbzOrTbzMatchTrait<N>::kTestAndBranchOpcode,
|
||||
g.UseRegister(m_and.left().node()),
|
||||
g.TempImmediate(base::bits::CountTrailingZeros(value)), cont);
|
||||
return true;
|
||||
}
|
||||
@ -2048,7 +2055,8 @@ bool TryEmitCbzOrTbz(InstructionSelector* selector, Node* node, uint32_t value,
|
||||
if (value != 0) return false;
|
||||
Arm64OperandGenerator g(selector);
|
||||
cont->Overwrite(MapForCbz(cond));
|
||||
EmitBranchOrDeoptimize(selector, kArm64CompareAndBranch32,
|
||||
EmitBranchOrDeoptimize(selector,
|
||||
CbzOrTbzMatchTrait<N>::kCompareAndBranchOpcode,
|
||||
g.UseRegister(node), cont);
|
||||
return true;
|
||||
}
|
||||
@ -2057,20 +2065,50 @@ bool TryEmitCbzOrTbz(InstructionSelector* selector, Node* node, uint32_t value,
|
||||
}
|
||||
}
|
||||
|
||||
// Shared routine for multiple word compare operations.
|
||||
void VisitWordCompare(InstructionSelector* selector, Node* node,
|
||||
InstructionCode opcode, FlagsContinuation* cont,
|
||||
ImmediateMode immediate_mode) {
|
||||
Arm64OperandGenerator g(selector);
|
||||
|
||||
Node* left = node->InputAt(0);
|
||||
Node* right = node->InputAt(1);
|
||||
|
||||
// If one of the two inputs is an immediate, make sure it's on the right.
|
||||
if (!g.CanBeImmediate(right, immediate_mode) &&
|
||||
g.CanBeImmediate(left, immediate_mode)) {
|
||||
cont->Commute();
|
||||
std::swap(left, right);
|
||||
}
|
||||
|
||||
if (opcode == kArm64Cmp && !cont->IsPoisoned()) {
|
||||
Int64Matcher m(right);
|
||||
if (m.HasValue()) {
|
||||
if (TryEmitCbzOrTbz<64>(selector, left, m.Value(), node,
|
||||
cont->condition(), cont)) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
VisitCompare(selector, opcode, g.UseRegister(left),
|
||||
g.UseOperand(right, immediate_mode), cont);
|
||||
}
|
||||
|
||||
void VisitWord32Compare(InstructionSelector* selector, Node* node,
|
||||
FlagsContinuation* cont) {
|
||||
Int32BinopMatcher m(node);
|
||||
FlagsCondition cond = cont->condition();
|
||||
if (!cont->IsPoisoned()) {
|
||||
if (m.right().HasValue()) {
|
||||
if (TryEmitCbzOrTbz(selector, m.left().node(), m.right().Value(), node,
|
||||
cond, cont)) {
|
||||
if (TryEmitCbzOrTbz<32>(selector, m.left().node(), m.right().Value(),
|
||||
node, cond, cont)) {
|
||||
return;
|
||||
}
|
||||
} else if (m.left().HasValue()) {
|
||||
FlagsCondition commuted_cond = CommuteFlagsCondition(cond);
|
||||
if (TryEmitCbzOrTbz(selector, m.right().node(), m.left().Value(), node,
|
||||
commuted_cond, cont)) {
|
||||
if (TryEmitCbzOrTbz<32>(selector, m.right().node(), m.left().Value(),
|
||||
node, commuted_cond, cont)) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
@ -2378,13 +2416,6 @@ void InstructionSelector::VisitWordCompareZero(Node* user, Node* value,
|
||||
if (CanCover(value, left) && left->opcode() == IrOpcode::kWord64And) {
|
||||
return VisitWordCompare(this, left, kArm64Tst, cont, kLogical64Imm);
|
||||
}
|
||||
// Merge the Word64Equal(x, 0) comparison into a cbz instruction.
|
||||
if ((cont->IsBranch() || cont->IsDeoptimize()) &&
|
||||
!cont->IsPoisoned()) {
|
||||
EmitBranchOrDeoptimize(this, kArm64CompareAndBranch,
|
||||
g.UseRegister(left), cont);
|
||||
return;
|
||||
}
|
||||
}
|
||||
return VisitWordCompare(this, value, kArm64Cmp, cont, kArithmeticImm);
|
||||
}
|
||||
|
@ -1153,7 +1153,7 @@ TEST_F(InstructionSelectorTest, AddBranchWithImmediateOnLeft) {
|
||||
|
||||
struct TestAndBranch {
|
||||
MachInst<std::function<Node*(InstructionSelectorTest::StreamBuilder&, Node*,
|
||||
uint32_t mask)>>
|
||||
uint64_t mask)>>
|
||||
mi;
|
||||
FlagsCondition cond;
|
||||
};
|
||||
@ -1272,6 +1272,92 @@ INSTANTIATE_TEST_SUITE_P(InstructionSelectorTest,
|
||||
InstructionSelectorTestAndBranchTest,
|
||||
::testing::ValuesIn(kTestAndBranchMatchers32));
|
||||
|
||||
// TODO(arm64): Add the missing Word32BinaryNot test cases from the 32-bit
|
||||
// version.
|
||||
const TestAndBranch kTestAndBranchMatchers64[] = {
|
||||
// Branch on the result of Word64And directly.
|
||||
{{[](InstructionSelectorTest::StreamBuilder& m, Node* x, uint64_t mask)
|
||||
-> Node* { return m.Word64And(x, m.Int64Constant(mask)); },
|
||||
"if (x and mask)", kArm64TestAndBranch, MachineType::Int64()},
|
||||
kNotEqual},
|
||||
{{[](InstructionSelectorTest::StreamBuilder& m, Node* x,
|
||||
uint64_t mask) -> Node* {
|
||||
return m.Word64Equal(m.Word64And(x, m.Int64Constant(mask)),
|
||||
m.Int64Constant(0));
|
||||
},
|
||||
"if not (x and mask)", kArm64TestAndBranch, MachineType::Int64()},
|
||||
kEqual},
|
||||
{{[](InstructionSelectorTest::StreamBuilder& m, Node* x, uint64_t mask)
|
||||
-> Node* { return m.Word64And(m.Int64Constant(mask), x); },
|
||||
"if (mask and x)", kArm64TestAndBranch, MachineType::Int64()},
|
||||
kNotEqual},
|
||||
{{[](InstructionSelectorTest::StreamBuilder& m, Node* x,
|
||||
uint64_t mask) -> Node* {
|
||||
return m.Word64Equal(m.Word64And(m.Int64Constant(mask), x),
|
||||
m.Int64Constant(0));
|
||||
},
|
||||
"if not (mask and x)", kArm64TestAndBranch, MachineType::Int64()},
|
||||
kEqual},
|
||||
// Branch on the result of '(x and mask) == mask'. This tests that a bit is
|
||||
// set rather than cleared which is why conditions are inverted.
|
||||
{{[](InstructionSelectorTest::StreamBuilder& m, Node* x,
|
||||
uint64_t mask) -> Node* {
|
||||
return m.Word64Equal(m.Word64And(x, m.Int64Constant(mask)),
|
||||
m.Int64Constant(mask));
|
||||
},
|
||||
"if ((x and mask) == mask)", kArm64TestAndBranch, MachineType::Int64()},
|
||||
kNotEqual},
|
||||
{{[](InstructionSelectorTest::StreamBuilder& m, Node* x,
|
||||
uint64_t mask) -> Node* {
|
||||
return m.Word64Equal(m.Int64Constant(mask),
|
||||
m.Word64And(x, m.Int64Constant(mask)));
|
||||
},
|
||||
"if (mask == (x and mask))", kArm64TestAndBranch, MachineType::Int64()},
|
||||
kNotEqual},
|
||||
// Same as above but swap 'mask' and 'x'.
|
||||
{{[](InstructionSelectorTest::StreamBuilder& m, Node* x,
|
||||
uint64_t mask) -> Node* {
|
||||
return m.Word64Equal(m.Word64And(m.Int64Constant(mask), x),
|
||||
m.Int64Constant(mask));
|
||||
},
|
||||
"if ((mask and x) == mask)", kArm64TestAndBranch, MachineType::Int64()},
|
||||
kNotEqual},
|
||||
{{[](InstructionSelectorTest::StreamBuilder& m, Node* x,
|
||||
uint64_t mask) -> Node* {
|
||||
return m.Word64Equal(m.Int64Constant(mask),
|
||||
m.Word64And(m.Int64Constant(mask), x));
|
||||
},
|
||||
"if (mask == (mask and x))", kArm64TestAndBranch, MachineType::Int64()},
|
||||
kNotEqual}};
|
||||
|
||||
using InstructionSelectorTestAndBranchTest64 =
|
||||
InstructionSelectorTestWithParam<TestAndBranch>;
|
||||
|
||||
TEST_P(InstructionSelectorTestAndBranchTest64, TestAndBranch64) {
|
||||
const TestAndBranch inst = GetParam();
|
||||
TRACED_FORRANGE(int, bit, 0, 63) {
|
||||
uint64_t mask = uint64_t{1} << bit;
|
||||
StreamBuilder m(this, MachineType::Int64(), MachineType::Int64());
|
||||
RawMachineLabel a, b;
|
||||
m.Branch(inst.mi.constructor(m, m.Parameter(0), mask), &a, &b);
|
||||
m.Bind(&a);
|
||||
m.Return(m.Int64Constant(1));
|
||||
m.Bind(&b);
|
||||
m.Return(m.Int64Constant(0));
|
||||
Stream s = m.Build();
|
||||
ASSERT_EQ(1U, s.size());
|
||||
EXPECT_EQ(inst.mi.arch_opcode, s[0]->arch_opcode());
|
||||
EXPECT_EQ(inst.cond, s[0]->flags_condition());
|
||||
EXPECT_EQ(4U, s[0]->InputCount());
|
||||
EXPECT_EQ(InstructionOperand::IMMEDIATE, s[0]->InputAt(1)->kind());
|
||||
EXPECT_EQ(bit, s.ToInt64(s[0]->InputAt(1)));
|
||||
}
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(InstructionSelectorTest,
|
||||
InstructionSelectorTestAndBranchTest64,
|
||||
::testing::ValuesIn(kTestAndBranchMatchers64));
|
||||
|
||||
TEST_F(InstructionSelectorTest, Word64AndBranchWithOneBitMaskOnRight) {
|
||||
TRACED_FORRANGE(int, bit, 0, 63) {
|
||||
uint64_t mask = uint64_t{1} << bit;
|
||||
@ -3506,6 +3592,33 @@ const IntegerCmp kBinopCmpZeroRightInstructions[] = {
|
||||
kNotEqual,
|
||||
kNotEqual}};
|
||||
|
||||
const IntegerCmp kBinop64CmpZeroRightInstructions[] = {
|
||||
{{&RawMachineAssembler::Word64Equal, "Word64Equal", kArm64Cmp,
|
||||
MachineType::Int64()},
|
||||
kEqual,
|
||||
kEqual},
|
||||
{{&RawMachineAssembler::Word64NotEqual, "Word64NotEqual", kArm64Cmp,
|
||||
MachineType::Int64()},
|
||||
kNotEqual,
|
||||
kNotEqual},
|
||||
{{&RawMachineAssembler::Int64LessThan, "Int64LessThan", kArm64Cmp,
|
||||
MachineType::Int64()},
|
||||
kNegative,
|
||||
kNegative},
|
||||
{{&RawMachineAssembler::Int64GreaterThanOrEqual, "Int64GreaterThanOrEqual",
|
||||
kArm64Cmp, MachineType::Int64()},
|
||||
kPositiveOrZero,
|
||||
kPositiveOrZero},
|
||||
{{&RawMachineAssembler::Uint64LessThanOrEqual, "Uint64LessThanOrEqual",
|
||||
kArm64Cmp, MachineType::Int64()},
|
||||
kEqual,
|
||||
kEqual},
|
||||
{{&RawMachineAssembler::Uint64GreaterThan, "Uint64GreaterThan", kArm64Cmp,
|
||||
MachineType::Int64()},
|
||||
kNotEqual,
|
||||
kNotEqual},
|
||||
};
|
||||
|
||||
const IntegerCmp kBinopCmpZeroLeftInstructions[] = {
|
||||
{{&RawMachineAssembler::Word32Equal, "Word32Equal", kArm64Cmp32,
|
||||
MachineType::Int32()},
|
||||
@ -4530,6 +4643,34 @@ TEST_F(InstructionSelectorTest, CompareAgainstZero32) {
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(InstructionSelectorTest, CompareAgainstZero64) {
|
||||
TRACED_FOREACH(IntegerCmp, cmp, kBinop64CmpZeroRightInstructions) {
|
||||
StreamBuilder m(this, MachineType::Int64(), MachineType::Int64());
|
||||
Node* const param = m.Parameter(0);
|
||||
RawMachineLabel a, b;
|
||||
m.Branch((m.*cmp.mi.constructor)(param, m.Int64Constant(0)), &a, &b);
|
||||
m.Bind(&a);
|
||||
m.Return(m.Int64Constant(1));
|
||||
m.Bind(&b);
|
||||
m.Return(m.Int64Constant(0));
|
||||
Stream s = m.Build();
|
||||
ASSERT_EQ(1U, s.size());
|
||||
EXPECT_EQ(s.ToVreg(param), s.ToVreg(s[0]->InputAt(0)));
|
||||
if (cmp.cond == kNegative || cmp.cond == kPositiveOrZero) {
|
||||
EXPECT_EQ(kArm64TestAndBranch, s[0]->arch_opcode());
|
||||
EXPECT_EQ(4U, s[0]->InputCount()); // The labels are also inputs.
|
||||
EXPECT_EQ((cmp.cond == kNegative) ? kNotEqual : kEqual,
|
||||
s[0]->flags_condition());
|
||||
EXPECT_EQ(InstructionOperand::IMMEDIATE, s[0]->InputAt(1)->kind());
|
||||
EXPECT_EQ(63, s.ToInt32(s[0]->InputAt(1)));
|
||||
} else {
|
||||
EXPECT_EQ(kArm64CompareAndBranch, s[0]->arch_opcode());
|
||||
EXPECT_EQ(3U, s[0]->InputCount()); // The labels are also inputs.
|
||||
EXPECT_EQ(cmp.cond, s[0]->flags_condition());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(InstructionSelectorTest, CompareFloat64HighLessThanZero64) {
|
||||
StreamBuilder m(this, MachineType::Int32(), MachineType::Float64());
|
||||
Node* const param = m.Parameter(0);
|
||||
|
Loading…
Reference in New Issue
Block a user