[arm64] Fold comparisons into branches for Word64

Make TryEmitCbzOrTbz a template, so it can be used for Word64 as well as Word32.

0.09% reduction of embedded builtins size with a arm64 ptr-compr build.

Some of the unittests weren't ported to Word64 as they don't pass, this is due to
VisitWordCompare missing a loop to remove Word64Equal comparisons against 0. This
can be added in a different CL if needed.

Change-Id: I927129d934083b71abe5b77991c39286470a228d
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/1792908
Commit-Queue: Martyn Capewell <martyn.capewell@arm.com>
Reviewed-by: Michael Starzinger <mstarzinger@chromium.org>
Cr-Commit-Position: refs/heads/master@{#63751}
This commit is contained in:
Joey Gouly 2019-09-13 12:08:41 +01:00 committed by Commit Bot
parent d287e4bc46
commit 01b5a7ed67
2 changed files with 228 additions and 56 deletions

View File

@ -1830,31 +1830,6 @@ void VisitCompare(InstructionSelector* selector, InstructionCode opcode,
selector->EmitWithContinuation(opcode, left, right, cont);
}
// Shared routine for multiple word compare operations.
void VisitWordCompare(InstructionSelector* selector, Node* node,
InstructionCode opcode, FlagsContinuation* cont,
ImmediateMode immediate_mode) {
Arm64OperandGenerator g(selector);
Node* left = node->InputAt(0);
Node* right = node->InputAt(1);
// If one of the two inputs is an immediate, make sure it's on the right.
if (!g.CanBeImmediate(right, immediate_mode) &&
g.CanBeImmediate(left, immediate_mode)) {
cont->Commute();
std::swap(left, right);
}
if (g.CanBeImmediate(right, immediate_mode)) {
VisitCompare(selector, opcode, g.UseRegister(left), g.UseImmediate(right),
cont);
} else {
VisitCompare(selector, opcode, g.UseRegister(left), g.UseRegister(right),
cont);
}
}
// This function checks whether we can convert:
// ((a <op> b) cmp 0), b.<cond>
// to:
@ -1986,9 +1961,35 @@ void EmitBranchOrDeoptimize(InstructionSelector* selector,
selector->EmitWithContinuation(opcode, value, cont);
}
template <int N>
struct CbzOrTbzMatchTrait {};
template <>
struct CbzOrTbzMatchTrait<32> {
using IntegralType = uint32_t;
using BinopMatcher = Int32BinopMatcher;
static constexpr IrOpcode::Value kAndOpcode = IrOpcode::kWord32And;
static constexpr ArchOpcode kTestAndBranchOpcode = kArm64TestAndBranch32;
static constexpr ArchOpcode kCompareAndBranchOpcode =
kArm64CompareAndBranch32;
static constexpr unsigned kSignBit = kWSignBit;
};
template <>
struct CbzOrTbzMatchTrait<64> {
using IntegralType = uint64_t;
using BinopMatcher = Int64BinopMatcher;
static constexpr IrOpcode::Value kAndOpcode = IrOpcode::kWord64And;
static constexpr ArchOpcode kTestAndBranchOpcode = kArm64TestAndBranch;
static constexpr ArchOpcode kCompareAndBranchOpcode = kArm64CompareAndBranch;
static constexpr unsigned kSignBit = kXSignBit;
};
// Try to emit TBZ, TBNZ, CBZ or CBNZ for certain comparisons of {node}
// against {value}, depending on the condition.
bool TryEmitCbzOrTbz(InstructionSelector* selector, Node* node, uint32_t value,
template <int N>
bool TryEmitCbzOrTbz(InstructionSelector* selector, Node* node,
typename CbzOrTbzMatchTrait<N>::IntegralType value,
Node* user, FlagsCondition cond, FlagsContinuation* cont) {
// Branch poisoning requires flags to be set, so when it's enabled for
// a particular branch, we shouldn't be applying the cbz/tbz optimization.
@ -2007,28 +2008,33 @@ bool TryEmitCbzOrTbz(InstructionSelector* selector, Node* node, uint32_t value,
if (cont->IsDeoptimize()) return false;
Arm64OperandGenerator g(selector);
cont->Overwrite(MapForTbz(cond));
if (N == 32) {
Int32Matcher m(node);
if (m.IsFloat64ExtractHighWord32() && selector->CanCover(user, node)) {
// SignedLessThan(Float64ExtractHighWord32(x), 0) and
// SignedGreaterThanOrEqual(Float64ExtractHighWord32(x), 0) essentially
// check the sign bit of a 64-bit floating point value.
// SignedGreaterThanOrEqual(Float64ExtractHighWord32(x), 0)
// essentially check the sign bit of a 64-bit floating point value.
InstructionOperand temp = g.TempRegister();
selector->Emit(kArm64U64MoveFloat64, temp,
g.UseRegister(node->InputAt(0)));
selector->EmitWithContinuation(kArm64TestAndBranch, temp,
g.TempImmediate(63), cont);
g.TempImmediate(kDSignBit), cont);
return true;
}
selector->EmitWithContinuation(kArm64TestAndBranch32, g.UseRegister(node),
g.TempImmediate(31), cont);
}
selector->EmitWithContinuation(
CbzOrTbzMatchTrait<N>::kTestAndBranchOpcode, g.UseRegister(node),
g.TempImmediate(CbzOrTbzMatchTrait<N>::kSignBit), cont);
return true;
}
case kEqual:
case kNotEqual: {
if (node->opcode() == IrOpcode::kWord32And) {
if (node->opcode() == CbzOrTbzMatchTrait<N>::kAndOpcode) {
// Emit a tbz/tbnz if we are comparing with a single-bit mask:
// Branch(Word32Equal(Word32And(x, 1 << N), 1 << N), true, false)
Int32BinopMatcher m_and(node);
// Branch(WordEqual(WordAnd(x, 1 << N), 1 << N), true, false)
typename CbzOrTbzMatchTrait<N>::BinopMatcher m_and(node);
if (cont->IsBranch() && base::bits::IsPowerOfTwo(value) &&
m_and.right().Is(value) && selector->CanCover(user, node)) {
Arm64OperandGenerator g(selector);
@ -2036,7 +2042,8 @@ bool TryEmitCbzOrTbz(InstructionSelector* selector, Node* node, uint32_t value,
// the opposite here so negate the condition.
cont->Negate();
selector->EmitWithContinuation(
kArm64TestAndBranch32, g.UseRegister(m_and.left().node()),
CbzOrTbzMatchTrait<N>::kTestAndBranchOpcode,
g.UseRegister(m_and.left().node()),
g.TempImmediate(base::bits::CountTrailingZeros(value)), cont);
return true;
}
@ -2048,7 +2055,8 @@ bool TryEmitCbzOrTbz(InstructionSelector* selector, Node* node, uint32_t value,
if (value != 0) return false;
Arm64OperandGenerator g(selector);
cont->Overwrite(MapForCbz(cond));
EmitBranchOrDeoptimize(selector, kArm64CompareAndBranch32,
EmitBranchOrDeoptimize(selector,
CbzOrTbzMatchTrait<N>::kCompareAndBranchOpcode,
g.UseRegister(node), cont);
return true;
}
@ -2057,20 +2065,50 @@ bool TryEmitCbzOrTbz(InstructionSelector* selector, Node* node, uint32_t value,
}
}
// Shared routine for multiple word compare operations.
void VisitWordCompare(InstructionSelector* selector, Node* node,
InstructionCode opcode, FlagsContinuation* cont,
ImmediateMode immediate_mode) {
Arm64OperandGenerator g(selector);
Node* left = node->InputAt(0);
Node* right = node->InputAt(1);
// If one of the two inputs is an immediate, make sure it's on the right.
if (!g.CanBeImmediate(right, immediate_mode) &&
g.CanBeImmediate(left, immediate_mode)) {
cont->Commute();
std::swap(left, right);
}
if (opcode == kArm64Cmp && !cont->IsPoisoned()) {
Int64Matcher m(right);
if (m.HasValue()) {
if (TryEmitCbzOrTbz<64>(selector, left, m.Value(), node,
cont->condition(), cont)) {
return;
}
}
}
VisitCompare(selector, opcode, g.UseRegister(left),
g.UseOperand(right, immediate_mode), cont);
}
void VisitWord32Compare(InstructionSelector* selector, Node* node,
FlagsContinuation* cont) {
Int32BinopMatcher m(node);
FlagsCondition cond = cont->condition();
if (!cont->IsPoisoned()) {
if (m.right().HasValue()) {
if (TryEmitCbzOrTbz(selector, m.left().node(), m.right().Value(), node,
cond, cont)) {
if (TryEmitCbzOrTbz<32>(selector, m.left().node(), m.right().Value(),
node, cond, cont)) {
return;
}
} else if (m.left().HasValue()) {
FlagsCondition commuted_cond = CommuteFlagsCondition(cond);
if (TryEmitCbzOrTbz(selector, m.right().node(), m.left().Value(), node,
commuted_cond, cont)) {
if (TryEmitCbzOrTbz<32>(selector, m.right().node(), m.left().Value(),
node, commuted_cond, cont)) {
return;
}
}
@ -2378,13 +2416,6 @@ void InstructionSelector::VisitWordCompareZero(Node* user, Node* value,
if (CanCover(value, left) && left->opcode() == IrOpcode::kWord64And) {
return VisitWordCompare(this, left, kArm64Tst, cont, kLogical64Imm);
}
// Merge the Word64Equal(x, 0) comparison into a cbz instruction.
if ((cont->IsBranch() || cont->IsDeoptimize()) &&
!cont->IsPoisoned()) {
EmitBranchOrDeoptimize(this, kArm64CompareAndBranch,
g.UseRegister(left), cont);
return;
}
}
return VisitWordCompare(this, value, kArm64Cmp, cont, kArithmeticImm);
}

View File

@ -1153,7 +1153,7 @@ TEST_F(InstructionSelectorTest, AddBranchWithImmediateOnLeft) {
struct TestAndBranch {
MachInst<std::function<Node*(InstructionSelectorTest::StreamBuilder&, Node*,
uint32_t mask)>>
uint64_t mask)>>
mi;
FlagsCondition cond;
};
@ -1272,6 +1272,92 @@ INSTANTIATE_TEST_SUITE_P(InstructionSelectorTest,
InstructionSelectorTestAndBranchTest,
::testing::ValuesIn(kTestAndBranchMatchers32));
// TODO(arm64): Add the missing Word32BinaryNot test cases from the 32-bit
// version.
const TestAndBranch kTestAndBranchMatchers64[] = {
// Branch on the result of Word64And directly.
{{[](InstructionSelectorTest::StreamBuilder& m, Node* x, uint64_t mask)
-> Node* { return m.Word64And(x, m.Int64Constant(mask)); },
"if (x and mask)", kArm64TestAndBranch, MachineType::Int64()},
kNotEqual},
{{[](InstructionSelectorTest::StreamBuilder& m, Node* x,
uint64_t mask) -> Node* {
return m.Word64Equal(m.Word64And(x, m.Int64Constant(mask)),
m.Int64Constant(0));
},
"if not (x and mask)", kArm64TestAndBranch, MachineType::Int64()},
kEqual},
{{[](InstructionSelectorTest::StreamBuilder& m, Node* x, uint64_t mask)
-> Node* { return m.Word64And(m.Int64Constant(mask), x); },
"if (mask and x)", kArm64TestAndBranch, MachineType::Int64()},
kNotEqual},
{{[](InstructionSelectorTest::StreamBuilder& m, Node* x,
uint64_t mask) -> Node* {
return m.Word64Equal(m.Word64And(m.Int64Constant(mask), x),
m.Int64Constant(0));
},
"if not (mask and x)", kArm64TestAndBranch, MachineType::Int64()},
kEqual},
// Branch on the result of '(x and mask) == mask'. This tests that a bit is
// set rather than cleared which is why conditions are inverted.
{{[](InstructionSelectorTest::StreamBuilder& m, Node* x,
uint64_t mask) -> Node* {
return m.Word64Equal(m.Word64And(x, m.Int64Constant(mask)),
m.Int64Constant(mask));
},
"if ((x and mask) == mask)", kArm64TestAndBranch, MachineType::Int64()},
kNotEqual},
{{[](InstructionSelectorTest::StreamBuilder& m, Node* x,
uint64_t mask) -> Node* {
return m.Word64Equal(m.Int64Constant(mask),
m.Word64And(x, m.Int64Constant(mask)));
},
"if (mask == (x and mask))", kArm64TestAndBranch, MachineType::Int64()},
kNotEqual},
// Same as above but swap 'mask' and 'x'.
{{[](InstructionSelectorTest::StreamBuilder& m, Node* x,
uint64_t mask) -> Node* {
return m.Word64Equal(m.Word64And(m.Int64Constant(mask), x),
m.Int64Constant(mask));
},
"if ((mask and x) == mask)", kArm64TestAndBranch, MachineType::Int64()},
kNotEqual},
{{[](InstructionSelectorTest::StreamBuilder& m, Node* x,
uint64_t mask) -> Node* {
return m.Word64Equal(m.Int64Constant(mask),
m.Word64And(m.Int64Constant(mask), x));
},
"if (mask == (mask and x))", kArm64TestAndBranch, MachineType::Int64()},
kNotEqual}};
using InstructionSelectorTestAndBranchTest64 =
InstructionSelectorTestWithParam<TestAndBranch>;
TEST_P(InstructionSelectorTestAndBranchTest64, TestAndBranch64) {
const TestAndBranch inst = GetParam();
TRACED_FORRANGE(int, bit, 0, 63) {
uint64_t mask = uint64_t{1} << bit;
StreamBuilder m(this, MachineType::Int64(), MachineType::Int64());
RawMachineLabel a, b;
m.Branch(inst.mi.constructor(m, m.Parameter(0), mask), &a, &b);
m.Bind(&a);
m.Return(m.Int64Constant(1));
m.Bind(&b);
m.Return(m.Int64Constant(0));
Stream s = m.Build();
ASSERT_EQ(1U, s.size());
EXPECT_EQ(inst.mi.arch_opcode, s[0]->arch_opcode());
EXPECT_EQ(inst.cond, s[0]->flags_condition());
EXPECT_EQ(4U, s[0]->InputCount());
EXPECT_EQ(InstructionOperand::IMMEDIATE, s[0]->InputAt(1)->kind());
EXPECT_EQ(bit, s.ToInt64(s[0]->InputAt(1)));
}
}
INSTANTIATE_TEST_SUITE_P(InstructionSelectorTest,
InstructionSelectorTestAndBranchTest64,
::testing::ValuesIn(kTestAndBranchMatchers64));
TEST_F(InstructionSelectorTest, Word64AndBranchWithOneBitMaskOnRight) {
TRACED_FORRANGE(int, bit, 0, 63) {
uint64_t mask = uint64_t{1} << bit;
@ -3506,6 +3592,33 @@ const IntegerCmp kBinopCmpZeroRightInstructions[] = {
kNotEqual,
kNotEqual}};
const IntegerCmp kBinop64CmpZeroRightInstructions[] = {
{{&RawMachineAssembler::Word64Equal, "Word64Equal", kArm64Cmp,
MachineType::Int64()},
kEqual,
kEqual},
{{&RawMachineAssembler::Word64NotEqual, "Word64NotEqual", kArm64Cmp,
MachineType::Int64()},
kNotEqual,
kNotEqual},
{{&RawMachineAssembler::Int64LessThan, "Int64LessThan", kArm64Cmp,
MachineType::Int64()},
kNegative,
kNegative},
{{&RawMachineAssembler::Int64GreaterThanOrEqual, "Int64GreaterThanOrEqual",
kArm64Cmp, MachineType::Int64()},
kPositiveOrZero,
kPositiveOrZero},
{{&RawMachineAssembler::Uint64LessThanOrEqual, "Uint64LessThanOrEqual",
kArm64Cmp, MachineType::Int64()},
kEqual,
kEqual},
{{&RawMachineAssembler::Uint64GreaterThan, "Uint64GreaterThan", kArm64Cmp,
MachineType::Int64()},
kNotEqual,
kNotEqual},
};
const IntegerCmp kBinopCmpZeroLeftInstructions[] = {
{{&RawMachineAssembler::Word32Equal, "Word32Equal", kArm64Cmp32,
MachineType::Int32()},
@ -4530,6 +4643,34 @@ TEST_F(InstructionSelectorTest, CompareAgainstZero32) {
}
}
TEST_F(InstructionSelectorTest, CompareAgainstZero64) {
TRACED_FOREACH(IntegerCmp, cmp, kBinop64CmpZeroRightInstructions) {
StreamBuilder m(this, MachineType::Int64(), MachineType::Int64());
Node* const param = m.Parameter(0);
RawMachineLabel a, b;
m.Branch((m.*cmp.mi.constructor)(param, m.Int64Constant(0)), &a, &b);
m.Bind(&a);
m.Return(m.Int64Constant(1));
m.Bind(&b);
m.Return(m.Int64Constant(0));
Stream s = m.Build();
ASSERT_EQ(1U, s.size());
EXPECT_EQ(s.ToVreg(param), s.ToVreg(s[0]->InputAt(0)));
if (cmp.cond == kNegative || cmp.cond == kPositiveOrZero) {
EXPECT_EQ(kArm64TestAndBranch, s[0]->arch_opcode());
EXPECT_EQ(4U, s[0]->InputCount()); // The labels are also inputs.
EXPECT_EQ((cmp.cond == kNegative) ? kNotEqual : kEqual,
s[0]->flags_condition());
EXPECT_EQ(InstructionOperand::IMMEDIATE, s[0]->InputAt(1)->kind());
EXPECT_EQ(63, s.ToInt32(s[0]->InputAt(1)));
} else {
EXPECT_EQ(kArm64CompareAndBranch, s[0]->arch_opcode());
EXPECT_EQ(3U, s[0]->InputCount()); // The labels are also inputs.
EXPECT_EQ(cmp.cond, s[0]->flags_condition());
}
}
}
TEST_F(InstructionSelectorTest, CompareFloat64HighLessThanZero64) {
StreamBuilder m(this, MachineType::Int32(), MachineType::Float64());
Node* const param = m.Parameter(0);