From 01b5a7ed67bd14752538a20b43c38a92731bbf4d Mon Sep 17 00:00:00 2001 From: Joey Gouly Date: Fri, 13 Sep 2019 12:08:41 +0100 Subject: [PATCH] [arm64] Fold comparisons into branches for Word64 Make TryEmitCbzOrTbz a template, so it can be used for Word64 as well as Word32. 0.09% reduction of embedded builtins size with a arm64 ptr-compr build. Some of the unittests weren't ported to Word64 as they don't pass, this is due to VisitWordCompare missing a loop to remove Word64Equal comparisons against 0. This can be added in a different CL if needed. Change-Id: I927129d934083b71abe5b77991c39286470a228d Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/1792908 Commit-Queue: Martyn Capewell Reviewed-by: Michael Starzinger Cr-Commit-Position: refs/heads/master@{#63751} --- .../arm64/instruction-selector-arm64.cc | 141 ++++++++++------- .../instruction-selector-arm64-unittest.cc | 143 +++++++++++++++++- 2 files changed, 228 insertions(+), 56 deletions(-) diff --git a/src/compiler/backend/arm64/instruction-selector-arm64.cc b/src/compiler/backend/arm64/instruction-selector-arm64.cc index bdfa36f289..6282b0dc7b 100644 --- a/src/compiler/backend/arm64/instruction-selector-arm64.cc +++ b/src/compiler/backend/arm64/instruction-selector-arm64.cc @@ -1830,31 +1830,6 @@ void VisitCompare(InstructionSelector* selector, InstructionCode opcode, selector->EmitWithContinuation(opcode, left, right, cont); } -// Shared routine for multiple word compare operations. -void VisitWordCompare(InstructionSelector* selector, Node* node, - InstructionCode opcode, FlagsContinuation* cont, - ImmediateMode immediate_mode) { - Arm64OperandGenerator g(selector); - - Node* left = node->InputAt(0); - Node* right = node->InputAt(1); - - // If one of the two inputs is an immediate, make sure it's on the right. - if (!g.CanBeImmediate(right, immediate_mode) && - g.CanBeImmediate(left, immediate_mode)) { - cont->Commute(); - std::swap(left, right); - } - - if (g.CanBeImmediate(right, immediate_mode)) { - VisitCompare(selector, opcode, g.UseRegister(left), g.UseImmediate(right), - cont); - } else { - VisitCompare(selector, opcode, g.UseRegister(left), g.UseRegister(right), - cont); - } -} - // This function checks whether we can convert: // ((a b) cmp 0), b. // to: @@ -1986,9 +1961,35 @@ void EmitBranchOrDeoptimize(InstructionSelector* selector, selector->EmitWithContinuation(opcode, value, cont); } +template +struct CbzOrTbzMatchTrait {}; + +template <> +struct CbzOrTbzMatchTrait<32> { + using IntegralType = uint32_t; + using BinopMatcher = Int32BinopMatcher; + static constexpr IrOpcode::Value kAndOpcode = IrOpcode::kWord32And; + static constexpr ArchOpcode kTestAndBranchOpcode = kArm64TestAndBranch32; + static constexpr ArchOpcode kCompareAndBranchOpcode = + kArm64CompareAndBranch32; + static constexpr unsigned kSignBit = kWSignBit; +}; + +template <> +struct CbzOrTbzMatchTrait<64> { + using IntegralType = uint64_t; + using BinopMatcher = Int64BinopMatcher; + static constexpr IrOpcode::Value kAndOpcode = IrOpcode::kWord64And; + static constexpr ArchOpcode kTestAndBranchOpcode = kArm64TestAndBranch; + static constexpr ArchOpcode kCompareAndBranchOpcode = kArm64CompareAndBranch; + static constexpr unsigned kSignBit = kXSignBit; +}; + // Try to emit TBZ, TBNZ, CBZ or CBNZ for certain comparisons of {node} // against {value}, depending on the condition. -bool TryEmitCbzOrTbz(InstructionSelector* selector, Node* node, uint32_t value, +template +bool TryEmitCbzOrTbz(InstructionSelector* selector, Node* node, + typename CbzOrTbzMatchTrait::IntegralType value, Node* user, FlagsCondition cond, FlagsContinuation* cont) { // Branch poisoning requires flags to be set, so when it's enabled for // a particular branch, we shouldn't be applying the cbz/tbz optimization. @@ -2007,28 +2008,33 @@ bool TryEmitCbzOrTbz(InstructionSelector* selector, Node* node, uint32_t value, if (cont->IsDeoptimize()) return false; Arm64OperandGenerator g(selector); cont->Overwrite(MapForTbz(cond)); - Int32Matcher m(node); - if (m.IsFloat64ExtractHighWord32() && selector->CanCover(user, node)) { - // SignedLessThan(Float64ExtractHighWord32(x), 0) and - // SignedGreaterThanOrEqual(Float64ExtractHighWord32(x), 0) essentially - // check the sign bit of a 64-bit floating point value. - InstructionOperand temp = g.TempRegister(); - selector->Emit(kArm64U64MoveFloat64, temp, - g.UseRegister(node->InputAt(0))); - selector->EmitWithContinuation(kArm64TestAndBranch, temp, - g.TempImmediate(63), cont); - return true; + + if (N == 32) { + Int32Matcher m(node); + if (m.IsFloat64ExtractHighWord32() && selector->CanCover(user, node)) { + // SignedLessThan(Float64ExtractHighWord32(x), 0) and + // SignedGreaterThanOrEqual(Float64ExtractHighWord32(x), 0) + // essentially check the sign bit of a 64-bit floating point value. + InstructionOperand temp = g.TempRegister(); + selector->Emit(kArm64U64MoveFloat64, temp, + g.UseRegister(node->InputAt(0))); + selector->EmitWithContinuation(kArm64TestAndBranch, temp, + g.TempImmediate(kDSignBit), cont); + return true; + } } - selector->EmitWithContinuation(kArm64TestAndBranch32, g.UseRegister(node), - g.TempImmediate(31), cont); + + selector->EmitWithContinuation( + CbzOrTbzMatchTrait::kTestAndBranchOpcode, g.UseRegister(node), + g.TempImmediate(CbzOrTbzMatchTrait::kSignBit), cont); return true; } case kEqual: case kNotEqual: { - if (node->opcode() == IrOpcode::kWord32And) { + if (node->opcode() == CbzOrTbzMatchTrait::kAndOpcode) { // Emit a tbz/tbnz if we are comparing with a single-bit mask: - // Branch(Word32Equal(Word32And(x, 1 << N), 1 << N), true, false) - Int32BinopMatcher m_and(node); + // Branch(WordEqual(WordAnd(x, 1 << N), 1 << N), true, false) + typename CbzOrTbzMatchTrait::BinopMatcher m_and(node); if (cont->IsBranch() && base::bits::IsPowerOfTwo(value) && m_and.right().Is(value) && selector->CanCover(user, node)) { Arm64OperandGenerator g(selector); @@ -2036,7 +2042,8 @@ bool TryEmitCbzOrTbz(InstructionSelector* selector, Node* node, uint32_t value, // the opposite here so negate the condition. cont->Negate(); selector->EmitWithContinuation( - kArm64TestAndBranch32, g.UseRegister(m_and.left().node()), + CbzOrTbzMatchTrait::kTestAndBranchOpcode, + g.UseRegister(m_and.left().node()), g.TempImmediate(base::bits::CountTrailingZeros(value)), cont); return true; } @@ -2048,7 +2055,8 @@ bool TryEmitCbzOrTbz(InstructionSelector* selector, Node* node, uint32_t value, if (value != 0) return false; Arm64OperandGenerator g(selector); cont->Overwrite(MapForCbz(cond)); - EmitBranchOrDeoptimize(selector, kArm64CompareAndBranch32, + EmitBranchOrDeoptimize(selector, + CbzOrTbzMatchTrait::kCompareAndBranchOpcode, g.UseRegister(node), cont); return true; } @@ -2057,20 +2065,50 @@ bool TryEmitCbzOrTbz(InstructionSelector* selector, Node* node, uint32_t value, } } +// Shared routine for multiple word compare operations. +void VisitWordCompare(InstructionSelector* selector, Node* node, + InstructionCode opcode, FlagsContinuation* cont, + ImmediateMode immediate_mode) { + Arm64OperandGenerator g(selector); + + Node* left = node->InputAt(0); + Node* right = node->InputAt(1); + + // If one of the two inputs is an immediate, make sure it's on the right. + if (!g.CanBeImmediate(right, immediate_mode) && + g.CanBeImmediate(left, immediate_mode)) { + cont->Commute(); + std::swap(left, right); + } + + if (opcode == kArm64Cmp && !cont->IsPoisoned()) { + Int64Matcher m(right); + if (m.HasValue()) { + if (TryEmitCbzOrTbz<64>(selector, left, m.Value(), node, + cont->condition(), cont)) { + return; + } + } + } + + VisitCompare(selector, opcode, g.UseRegister(left), + g.UseOperand(right, immediate_mode), cont); +} + void VisitWord32Compare(InstructionSelector* selector, Node* node, FlagsContinuation* cont) { Int32BinopMatcher m(node); FlagsCondition cond = cont->condition(); if (!cont->IsPoisoned()) { if (m.right().HasValue()) { - if (TryEmitCbzOrTbz(selector, m.left().node(), m.right().Value(), node, - cond, cont)) { + if (TryEmitCbzOrTbz<32>(selector, m.left().node(), m.right().Value(), + node, cond, cont)) { return; } } else if (m.left().HasValue()) { FlagsCondition commuted_cond = CommuteFlagsCondition(cond); - if (TryEmitCbzOrTbz(selector, m.right().node(), m.left().Value(), node, - commuted_cond, cont)) { + if (TryEmitCbzOrTbz<32>(selector, m.right().node(), m.left().Value(), + node, commuted_cond, cont)) { return; } } @@ -2378,13 +2416,6 @@ void InstructionSelector::VisitWordCompareZero(Node* user, Node* value, if (CanCover(value, left) && left->opcode() == IrOpcode::kWord64And) { return VisitWordCompare(this, left, kArm64Tst, cont, kLogical64Imm); } - // Merge the Word64Equal(x, 0) comparison into a cbz instruction. - if ((cont->IsBranch() || cont->IsDeoptimize()) && - !cont->IsPoisoned()) { - EmitBranchOrDeoptimize(this, kArm64CompareAndBranch, - g.UseRegister(left), cont); - return; - } } return VisitWordCompare(this, value, kArm64Cmp, cont, kArithmeticImm); } diff --git a/test/unittests/compiler/arm64/instruction-selector-arm64-unittest.cc b/test/unittests/compiler/arm64/instruction-selector-arm64-unittest.cc index 373276a8b7..bdee748646 100644 --- a/test/unittests/compiler/arm64/instruction-selector-arm64-unittest.cc +++ b/test/unittests/compiler/arm64/instruction-selector-arm64-unittest.cc @@ -1153,7 +1153,7 @@ TEST_F(InstructionSelectorTest, AddBranchWithImmediateOnLeft) { struct TestAndBranch { MachInst> + uint64_t mask)>> mi; FlagsCondition cond; }; @@ -1272,6 +1272,92 @@ INSTANTIATE_TEST_SUITE_P(InstructionSelectorTest, InstructionSelectorTestAndBranchTest, ::testing::ValuesIn(kTestAndBranchMatchers32)); +// TODO(arm64): Add the missing Word32BinaryNot test cases from the 32-bit +// version. +const TestAndBranch kTestAndBranchMatchers64[] = { + // Branch on the result of Word64And directly. + {{[](InstructionSelectorTest::StreamBuilder& m, Node* x, uint64_t mask) + -> Node* { return m.Word64And(x, m.Int64Constant(mask)); }, + "if (x and mask)", kArm64TestAndBranch, MachineType::Int64()}, + kNotEqual}, + {{[](InstructionSelectorTest::StreamBuilder& m, Node* x, + uint64_t mask) -> Node* { + return m.Word64Equal(m.Word64And(x, m.Int64Constant(mask)), + m.Int64Constant(0)); + }, + "if not (x and mask)", kArm64TestAndBranch, MachineType::Int64()}, + kEqual}, + {{[](InstructionSelectorTest::StreamBuilder& m, Node* x, uint64_t mask) + -> Node* { return m.Word64And(m.Int64Constant(mask), x); }, + "if (mask and x)", kArm64TestAndBranch, MachineType::Int64()}, + kNotEqual}, + {{[](InstructionSelectorTest::StreamBuilder& m, Node* x, + uint64_t mask) -> Node* { + return m.Word64Equal(m.Word64And(m.Int64Constant(mask), x), + m.Int64Constant(0)); + }, + "if not (mask and x)", kArm64TestAndBranch, MachineType::Int64()}, + kEqual}, + // Branch on the result of '(x and mask) == mask'. This tests that a bit is + // set rather than cleared which is why conditions are inverted. + {{[](InstructionSelectorTest::StreamBuilder& m, Node* x, + uint64_t mask) -> Node* { + return m.Word64Equal(m.Word64And(x, m.Int64Constant(mask)), + m.Int64Constant(mask)); + }, + "if ((x and mask) == mask)", kArm64TestAndBranch, MachineType::Int64()}, + kNotEqual}, + {{[](InstructionSelectorTest::StreamBuilder& m, Node* x, + uint64_t mask) -> Node* { + return m.Word64Equal(m.Int64Constant(mask), + m.Word64And(x, m.Int64Constant(mask))); + }, + "if (mask == (x and mask))", kArm64TestAndBranch, MachineType::Int64()}, + kNotEqual}, + // Same as above but swap 'mask' and 'x'. + {{[](InstructionSelectorTest::StreamBuilder& m, Node* x, + uint64_t mask) -> Node* { + return m.Word64Equal(m.Word64And(m.Int64Constant(mask), x), + m.Int64Constant(mask)); + }, + "if ((mask and x) == mask)", kArm64TestAndBranch, MachineType::Int64()}, + kNotEqual}, + {{[](InstructionSelectorTest::StreamBuilder& m, Node* x, + uint64_t mask) -> Node* { + return m.Word64Equal(m.Int64Constant(mask), + m.Word64And(m.Int64Constant(mask), x)); + }, + "if (mask == (mask and x))", kArm64TestAndBranch, MachineType::Int64()}, + kNotEqual}}; + +using InstructionSelectorTestAndBranchTest64 = + InstructionSelectorTestWithParam; + +TEST_P(InstructionSelectorTestAndBranchTest64, TestAndBranch64) { + const TestAndBranch inst = GetParam(); + TRACED_FORRANGE(int, bit, 0, 63) { + uint64_t mask = uint64_t{1} << bit; + StreamBuilder m(this, MachineType::Int64(), MachineType::Int64()); + RawMachineLabel a, b; + m.Branch(inst.mi.constructor(m, m.Parameter(0), mask), &a, &b); + m.Bind(&a); + m.Return(m.Int64Constant(1)); + m.Bind(&b); + m.Return(m.Int64Constant(0)); + Stream s = m.Build(); + ASSERT_EQ(1U, s.size()); + EXPECT_EQ(inst.mi.arch_opcode, s[0]->arch_opcode()); + EXPECT_EQ(inst.cond, s[0]->flags_condition()); + EXPECT_EQ(4U, s[0]->InputCount()); + EXPECT_EQ(InstructionOperand::IMMEDIATE, s[0]->InputAt(1)->kind()); + EXPECT_EQ(bit, s.ToInt64(s[0]->InputAt(1))); + } +} + +INSTANTIATE_TEST_SUITE_P(InstructionSelectorTest, + InstructionSelectorTestAndBranchTest64, + ::testing::ValuesIn(kTestAndBranchMatchers64)); + TEST_F(InstructionSelectorTest, Word64AndBranchWithOneBitMaskOnRight) { TRACED_FORRANGE(int, bit, 0, 63) { uint64_t mask = uint64_t{1} << bit; @@ -3506,6 +3592,33 @@ const IntegerCmp kBinopCmpZeroRightInstructions[] = { kNotEqual, kNotEqual}}; +const IntegerCmp kBinop64CmpZeroRightInstructions[] = { + {{&RawMachineAssembler::Word64Equal, "Word64Equal", kArm64Cmp, + MachineType::Int64()}, + kEqual, + kEqual}, + {{&RawMachineAssembler::Word64NotEqual, "Word64NotEqual", kArm64Cmp, + MachineType::Int64()}, + kNotEqual, + kNotEqual}, + {{&RawMachineAssembler::Int64LessThan, "Int64LessThan", kArm64Cmp, + MachineType::Int64()}, + kNegative, + kNegative}, + {{&RawMachineAssembler::Int64GreaterThanOrEqual, "Int64GreaterThanOrEqual", + kArm64Cmp, MachineType::Int64()}, + kPositiveOrZero, + kPositiveOrZero}, + {{&RawMachineAssembler::Uint64LessThanOrEqual, "Uint64LessThanOrEqual", + kArm64Cmp, MachineType::Int64()}, + kEqual, + kEqual}, + {{&RawMachineAssembler::Uint64GreaterThan, "Uint64GreaterThan", kArm64Cmp, + MachineType::Int64()}, + kNotEqual, + kNotEqual}, +}; + const IntegerCmp kBinopCmpZeroLeftInstructions[] = { {{&RawMachineAssembler::Word32Equal, "Word32Equal", kArm64Cmp32, MachineType::Int32()}, @@ -4530,6 +4643,34 @@ TEST_F(InstructionSelectorTest, CompareAgainstZero32) { } } +TEST_F(InstructionSelectorTest, CompareAgainstZero64) { + TRACED_FOREACH(IntegerCmp, cmp, kBinop64CmpZeroRightInstructions) { + StreamBuilder m(this, MachineType::Int64(), MachineType::Int64()); + Node* const param = m.Parameter(0); + RawMachineLabel a, b; + m.Branch((m.*cmp.mi.constructor)(param, m.Int64Constant(0)), &a, &b); + m.Bind(&a); + m.Return(m.Int64Constant(1)); + m.Bind(&b); + m.Return(m.Int64Constant(0)); + Stream s = m.Build(); + ASSERT_EQ(1U, s.size()); + EXPECT_EQ(s.ToVreg(param), s.ToVreg(s[0]->InputAt(0))); + if (cmp.cond == kNegative || cmp.cond == kPositiveOrZero) { + EXPECT_EQ(kArm64TestAndBranch, s[0]->arch_opcode()); + EXPECT_EQ(4U, s[0]->InputCount()); // The labels are also inputs. + EXPECT_EQ((cmp.cond == kNegative) ? kNotEqual : kEqual, + s[0]->flags_condition()); + EXPECT_EQ(InstructionOperand::IMMEDIATE, s[0]->InputAt(1)->kind()); + EXPECT_EQ(63, s.ToInt32(s[0]->InputAt(1))); + } else { + EXPECT_EQ(kArm64CompareAndBranch, s[0]->arch_opcode()); + EXPECT_EQ(3U, s[0]->InputCount()); // The labels are also inputs. + EXPECT_EQ(cmp.cond, s[0]->flags_condition()); + } + } +} + TEST_F(InstructionSelectorTest, CompareFloat64HighLessThanZero64) { StreamBuilder m(this, MachineType::Int32(), MachineType::Float64()); Node* const param = m.Parameter(0);