[turbofan] ARM64: Match 64 bit compare with zero and branch

This patch enables the following transformations in the instruction
selector:

| Before           | After                  |
|------------------+------------------------|
| and x3, x1, #0x1 | tb{,n}z w1, #0, #+0x78 |
| cmp x3, #0x0     |                        |
| b.{eq,ne} #+0x80 |                        |
|------------------+------------------------|
| cmp x0, #0x0     | cb{,n}z x0, #+0x48     |
| b.{eq,ne} #+0x4c |                        |

I have not seen these patterns beeing generated by turbofan, however the
stubs hit these cases frequently. A particular reason is that we are
turning operations that check for a Smi into a single `tbz`.

As a concequence, the interpreter is affected thanks to inlining
turbofan stubs into it's bytecode handlers. I have noticed the size of
the interpreter was reduced by 200 instructions.

BUG=

Review-Url: https://codereview.chromium.org/2022073002
Cr-Commit-Position: refs/heads/master@{#36632}
This commit is contained in:
pierre.langlois 2016-06-01 01:01:05 -07:00 committed by Commit bot
parent f2c0264a0a
commit 27bd1747b4
6 changed files with 225 additions and 31 deletions

View File

@ -111,7 +111,6 @@ T ReverseBits(T value) {
return result;
}
// CountTrailingZeros32(value) returns the number of zero bits preceding the
// least significant 1 bit in |value| if |value| is non-zero, otherwise it
// returns 32.
@ -147,6 +146,14 @@ inline unsigned CountTrailingZeros64(uint64_t value) {
#endif
}
// Overloaded versions of CountTrailingZeros32/64.
inline unsigned CountTrailingZeros(uint32_t value) {
return CountTrailingZeros32(value);
}
inline unsigned CountTrailingZeros(uint64_t value) {
return CountTrailingZeros64(value);
}
// Returns true iff |value| is a power of 2.
inline bool IsPowerOfTwo32(uint32_t value) {

View File

@ -1038,6 +1038,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
// Pseudo instructions turned into tbz/tbnz in AssembleArchBranch.
break;
case kArm64CompareAndBranch32:
case kArm64CompareAndBranch:
// Pseudo instruction turned into cbz/cbnz in AssembleArchBranch.
break;
case kArm64ClaimCSP: {
@ -1503,6 +1504,17 @@ void CodeGenerator::AssembleArchBranch(Instruction* instr, BranchInfo* branch) {
default:
UNREACHABLE();
}
} else if (opcode == kArm64CompareAndBranch) {
switch (condition) {
case kEqual:
__ Cbz(i.InputRegister64(0), tlabel);
break;
case kNotEqual:
__ Cbnz(i.InputRegister64(0), tlabel);
break;
default:
UNREACHABLE();
}
} else if (opcode == kArm64TestAndBranch32) {
switch (condition) {
case kEqual:

View File

@ -78,6 +78,7 @@ namespace compiler {
V(Arm64TestAndBranch32) \
V(Arm64TestAndBranch) \
V(Arm64CompareAndBranch32) \
V(Arm64CompareAndBranch) \
V(Arm64ClaimCSP) \
V(Arm64ClaimJSSP) \
V(Arm64PokeCSP) \

View File

@ -136,6 +136,7 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kArm64TestAndBranch32:
case kArm64TestAndBranch:
case kArm64CompareAndBranch32:
case kArm64CompareAndBranch:
return kIsBlockTerminator;
case kArm64LdrS:

View File

@ -1860,6 +1860,23 @@ void VisitWord64Test(InstructionSelector* selector, Node* node,
VisitWordTest(selector, node, kArm64Tst, cont);
}
template <typename Matcher, ArchOpcode kOpcode>
bool TryEmitTestAndBranch(InstructionSelector* selector, Node* node,
FlagsContinuation* cont) {
Arm64OperandGenerator g(selector);
Matcher m(node);
if (cont->IsBranch() && m.right().HasValue() &&
(base::bits::CountPopulation(m.right().Value()) == 1)) {
// If the mask has only one bit set, we can use tbz/tbnz.
DCHECK((cont->condition() == kEqual) || (cont->condition() == kNotEqual));
selector->Emit(
cont->Encode(kOpcode), g.NoOutput(), g.UseRegister(m.left().node()),
g.TempImmediate(base::bits::CountTrailingZeros(m.right().Value())),
g.Label(cont->true_block()), g.Label(cont->false_block()));
return true;
}
return false;
}
// Shared routine for multiple float32 compare operations.
void VisitFloat32Compare(InstructionSelector* selector, Node* node,
@ -1904,6 +1921,8 @@ void VisitWordCompareZero(InstructionSelector* selector, Node* user,
while (selector->CanCover(user, value)) {
switch (value->opcode()) {
case IrOpcode::kWord32Equal: {
// Combine with comparisons against 0 by simply inverting the
// continuation.
Int32BinopMatcher m(value);
if (m.right().Is(0)) {
user = value;
@ -1926,10 +1945,33 @@ void VisitWordCompareZero(InstructionSelector* selector, Node* user,
case IrOpcode::kUint32LessThanOrEqual:
cont->OverwriteAndNegateIfEqual(kUnsignedLessThanOrEqual);
return VisitWord32Compare(selector, value, cont);
case IrOpcode::kWord64Equal:
case IrOpcode::kWord64Equal: {
cont->OverwriteAndNegateIfEqual(kEqual);
Int64BinopMatcher m(value);
if (m.right().Is(0)) {
Node* const left = m.left().node();
if (selector->CanCover(value, left) &&
left->opcode() == IrOpcode::kWord64And) {
// Attempt to merge the Word64Equal(Word64And(x, y), 0) comparison
// into a tbz/tbnz instruction.
if (TryEmitTestAndBranch<Uint64BinopMatcher, kArm64TestAndBranch>(
selector, left, cont)) {
return;
}
return VisitWordCompare(selector, left, kArm64Tst, cont, true,
kLogical64Imm);
}
// Merge the Word64Equal(x, 0) comparison into a cbz instruction.
if (cont->IsBranch()) {
selector->Emit(cont->Encode(kArm64CompareAndBranch), g.NoOutput(),
g.UseRegister(left), g.Label(cont->true_block()),
g.Label(cont->false_block()));
return;
}
}
return VisitWordCompare(selector, value, kArm64Cmp, cont, false,
kArithmeticImm);
}
case IrOpcode::kInt64LessThan:
cont->OverwriteAndNegateIfEqual(kSignedLessThan);
return VisitWordCompare(selector, value, kArm64Cmp, cont, false,
@ -2004,42 +2046,20 @@ void VisitWordCompareZero(InstructionSelector* selector, Node* user,
kArithmeticImm);
case IrOpcode::kInt32Sub:
return VisitWord32Compare(selector, value, cont);
case IrOpcode::kWord32And: {
Int32BinopMatcher m(value);
if (cont->IsBranch() && m.right().HasValue() &&
(base::bits::CountPopulation32(m.right().Value()) == 1)) {
// If the mask has only one bit set, we can use tbz/tbnz.
DCHECK((cont->condition() == kEqual) ||
(cont->condition() == kNotEqual));
selector->Emit(
cont->Encode(kArm64TestAndBranch32), g.NoOutput(),
g.UseRegister(m.left().node()),
g.TempImmediate(
base::bits::CountTrailingZeros32(m.right().Value())),
g.Label(cont->true_block()), g.Label(cont->false_block()));
case IrOpcode::kWord32And:
if (TryEmitTestAndBranch<Uint32BinopMatcher, kArm64TestAndBranch32>(
selector, value, cont)) {
return;
}
return VisitWordCompare(selector, value, kArm64Tst32, cont, true,
kLogical32Imm);
}
case IrOpcode::kWord64And: {
Int64BinopMatcher m(value);
if (cont->IsBranch() && m.right().HasValue() &&
(base::bits::CountPopulation64(m.right().Value()) == 1)) {
// If the mask has only one bit set, we can use tbz/tbnz.
DCHECK((cont->condition() == kEqual) ||
(cont->condition() == kNotEqual));
selector->Emit(
cont->Encode(kArm64TestAndBranch), g.NoOutput(),
g.UseRegister(m.left().node()),
g.TempImmediate(
base::bits::CountTrailingZeros64(m.right().Value())),
g.Label(cont->true_block()), g.Label(cont->false_block()));
case IrOpcode::kWord64And:
if (TryEmitTestAndBranch<Uint64BinopMatcher, kArm64TestAndBranch>(
selector, value, cont)) {
return;
}
return VisitWordCompare(selector, value, kArm64Tst, cont, true,
kLogical64Imm);
}
default:
break;
}

View File

@ -1178,7 +1178,6 @@ TEST_F(InstructionSelectorTest, Word32AndBranchWithOneBitMaskOnRight) {
}
}
TEST_F(InstructionSelectorTest, Word32AndBranchWithOneBitMaskOnLeft) {
TRACED_FORRANGE(int, bit, 0, 31) {
uint32_t mask = 1 << bit;
@ -1261,6 +1260,91 @@ TEST_F(InstructionSelectorTest, Word64AndBranchWithOneBitMaskOnLeft) {
}
}
TEST_F(InstructionSelectorTest, Word32EqualZeroAndBranchWithOneBitMask) {
TRACED_FORRANGE(int, bit, 0, 31) {
uint32_t mask = 1 << bit;
StreamBuilder m(this, MachineType::Int32(), MachineType::Int32());
RawMachineLabel a, b;
m.Branch(m.Word32Equal(m.Word32And(m.Int32Constant(mask), m.Parameter(0)),
m.Int32Constant(0)),
&a, &b);
m.Bind(&a);
m.Return(m.Int32Constant(1));
m.Bind(&b);
m.Return(m.Int32Constant(0));
Stream s = m.Build();
ASSERT_EQ(1U, s.size());
EXPECT_EQ(kArm64TestAndBranch32, s[0]->arch_opcode());
EXPECT_EQ(kEqual, s[0]->flags_condition());
EXPECT_EQ(4U, s[0]->InputCount());
EXPECT_EQ(InstructionOperand::IMMEDIATE, s[0]->InputAt(1)->kind());
EXPECT_EQ(bit, s.ToInt32(s[0]->InputAt(1)));
}
TRACED_FORRANGE(int, bit, 0, 31) {
uint32_t mask = 1 << bit;
StreamBuilder m(this, MachineType::Int32(), MachineType::Int32());
RawMachineLabel a, b;
m.Branch(
m.Word32NotEqual(m.Word32And(m.Int32Constant(mask), m.Parameter(0)),
m.Int32Constant(0)),
&a, &b);
m.Bind(&a);
m.Return(m.Int32Constant(1));
m.Bind(&b);
m.Return(m.Int32Constant(0));
Stream s = m.Build();
ASSERT_EQ(1U, s.size());
EXPECT_EQ(kArm64TestAndBranch32, s[0]->arch_opcode());
EXPECT_EQ(kNotEqual, s[0]->flags_condition());
EXPECT_EQ(4U, s[0]->InputCount());
EXPECT_EQ(InstructionOperand::IMMEDIATE, s[0]->InputAt(1)->kind());
EXPECT_EQ(bit, s.ToInt32(s[0]->InputAt(1)));
}
}
TEST_F(InstructionSelectorTest, Word64EqualZeroAndBranchWithOneBitMask) {
TRACED_FORRANGE(int, bit, 0, 63) {
uint64_t mask = V8_UINT64_C(1) << bit;
StreamBuilder m(this, MachineType::Int64(), MachineType::Int64());
RawMachineLabel a, b;
m.Branch(m.Word64Equal(m.Word64And(m.Int64Constant(mask), m.Parameter(0)),
m.Int64Constant(0)),
&a, &b);
m.Bind(&a);
m.Return(m.Int64Constant(1));
m.Bind(&b);
m.Return(m.Int64Constant(0));
Stream s = m.Build();
ASSERT_EQ(1U, s.size());
EXPECT_EQ(kArm64TestAndBranch, s[0]->arch_opcode());
EXPECT_EQ(kEqual, s[0]->flags_condition());
EXPECT_EQ(4U, s[0]->InputCount());
EXPECT_EQ(InstructionOperand::IMMEDIATE, s[0]->InputAt(1)->kind());
EXPECT_EQ(bit, s.ToInt64(s[0]->InputAt(1)));
}
TRACED_FORRANGE(int, bit, 0, 63) {
uint64_t mask = V8_UINT64_C(1) << bit;
StreamBuilder m(this, MachineType::Int64(), MachineType::Int64());
RawMachineLabel a, b;
m.Branch(
m.Word64NotEqual(m.Word64And(m.Int64Constant(mask), m.Parameter(0)),
m.Int64Constant(0)),
&a, &b);
m.Bind(&a);
m.Return(m.Int64Constant(1));
m.Bind(&b);
m.Return(m.Int64Constant(0));
Stream s = m.Build();
ASSERT_EQ(1U, s.size());
EXPECT_EQ(kArm64TestAndBranch, s[0]->arch_opcode());
EXPECT_EQ(kNotEqual, s[0]->flags_condition());
EXPECT_EQ(4U, s[0]->InputCount());
EXPECT_EQ(InstructionOperand::IMMEDIATE, s[0]->InputAt(1)->kind());
EXPECT_EQ(bit, s.ToInt64(s[0]->InputAt(1)));
}
}
TEST_F(InstructionSelectorTest, CompareAgainstZeroAndBranch) {
{
@ -1298,6 +1382,75 @@ TEST_F(InstructionSelectorTest, CompareAgainstZeroAndBranch) {
}
}
TEST_F(InstructionSelectorTest, EqualZeroAndBranch) {
{
StreamBuilder m(this, MachineType::Int32(), MachineType::Int32());
RawMachineLabel a, b;
Node* p0 = m.Parameter(0);
m.Branch(m.Word32Equal(p0, m.Int32Constant(0)), &a, &b);
m.Bind(&a);
m.Return(m.Int32Constant(1));
m.Bind(&b);
m.Return(m.Int32Constant(0));
Stream s = m.Build();
ASSERT_EQ(1U, s.size());
EXPECT_EQ(kArm64CompareAndBranch32, s[0]->arch_opcode());
EXPECT_EQ(kEqual, s[0]->flags_condition());
EXPECT_EQ(3U, s[0]->InputCount());
EXPECT_EQ(s.ToVreg(p0), s.ToVreg(s[0]->InputAt(0)));
}
{
StreamBuilder m(this, MachineType::Int32(), MachineType::Int32());
RawMachineLabel a, b;
Node* p0 = m.Parameter(0);
m.Branch(m.Word32NotEqual(p0, m.Int32Constant(0)), &a, &b);
m.Bind(&a);
m.Return(m.Int32Constant(1));
m.Bind(&b);
m.Return(m.Int32Constant(0));
Stream s = m.Build();
ASSERT_EQ(1U, s.size());
EXPECT_EQ(kArm64CompareAndBranch32, s[0]->arch_opcode());
EXPECT_EQ(kNotEqual, s[0]->flags_condition());
EXPECT_EQ(3U, s[0]->InputCount());
EXPECT_EQ(s.ToVreg(p0), s.ToVreg(s[0]->InputAt(0)));
}
{
StreamBuilder m(this, MachineType::Int64(), MachineType::Int64());
RawMachineLabel a, b;
Node* p0 = m.Parameter(0);
m.Branch(m.Word64Equal(p0, m.Int64Constant(0)), &a, &b);
m.Bind(&a);
m.Return(m.Int64Constant(1));
m.Bind(&b);
m.Return(m.Int64Constant(0));
Stream s = m.Build();
ASSERT_EQ(1U, s.size());
EXPECT_EQ(kArm64CompareAndBranch, s[0]->arch_opcode());
EXPECT_EQ(kEqual, s[0]->flags_condition());
EXPECT_EQ(3U, s[0]->InputCount());
EXPECT_EQ(s.ToVreg(p0), s.ToVreg(s[0]->InputAt(0)));
}
{
StreamBuilder m(this, MachineType::Int64(), MachineType::Int64());
RawMachineLabel a, b;
Node* p0 = m.Parameter(0);
m.Branch(m.Word64NotEqual(p0, m.Int64Constant(0)), &a, &b);
m.Bind(&a);
m.Return(m.Int64Constant(1));
m.Bind(&b);
m.Return(m.Int64Constant(0));
Stream s = m.Build();
ASSERT_EQ(1U, s.size());
EXPECT_EQ(kArm64CompareAndBranch, s[0]->arch_opcode());
EXPECT_EQ(kNotEqual, s[0]->flags_condition());
EXPECT_EQ(3U, s[0]->InputCount());
EXPECT_EQ(s.ToVreg(p0), s.ToVreg(s[0]->InputAt(0)));
}
}
// -----------------------------------------------------------------------------
// Add and subtract instructions with overflow.