[ARM64] Optimize load followed by shift.

Instead of loading 64 bits and shifting: ldr x0, [x1, #offset] asr x0, x0, #32 directly load the interesting 32 bits and sign-extend: ldrsw x0, [x1, #offset+4] BUG= Review-Url: https://codereview.chromium.org/2243843002 Cr-Commit-Position: refs/heads/master@{#38622}
2016-08-12 06:55:33 -07:00 · 2016-08-12 06:55:33 -07:00 · dc6b5109d7
commit dc6b5109d7
parent f03bebc77d
2 changed files with 101 additions and 0 deletions
--- a/src/compiler/arm64/instruction-selector-arm64.cc
+++ b/src/compiler/arm64/instruction-selector-arm64.cc
@ -161,6 +161,78 @@ void VisitRRO(InstructionSelector* selector, ArchOpcode opcode, Node* node,
                 g.UseOperand(node->InputAt(1), operand_mode));
 }

+struct ExtendingLoadMatcher {
+  ExtendingLoadMatcher(Node* node, InstructionSelector* selector)
+      : matches_(false), selector_(selector), base_(nullptr), immediate_(0) {
+    Initialize(node);
+  }
+
+  bool Matches() const { return matches_; }
+
+  Node* base() const {
+    DCHECK(Matches());
+    return base_;
+  }
+  int64_t immediate() const {
+    DCHECK(Matches());
+    return immediate_;
+  }
+  ArchOpcode opcode() const {
+    DCHECK(Matches());
+    return opcode_;
+  }
+
+ private:
+  bool matches_;
+  InstructionSelector* selector_;
+  Node* base_;
+  int64_t immediate_;
+  ArchOpcode opcode_;
+
+  void Initialize(Node* node) {
+    Int64BinopMatcher m(node);
+    // When loading a 64-bit value and shifting by 32, we should
+    // just load and sign-extend the interesting 4 bytes instead.
+    // This happens, for example, when we're loading and untagging SMIs.
+    DCHECK(m.IsWord64Sar());
+    if (m.left().IsLoad() && m.right().Is(32) &&
+        selector_->CanCover(m.node(), m.left().node())) {
+      Arm64OperandGenerator g(selector_);
+      Node* load = m.left().node();
+      Node* offset = load->InputAt(1);
+      base_ = load->InputAt(0);
+      opcode_ = kArm64Ldrsw;
+      if (g.IsIntegerConstant(offset)) {
+        immediate_ = g.GetIntegerConstantValue(offset) + 4;
+        matches_ = g.CanBeImmediate(immediate_, kLoadStoreImm32);
+      }
+    }
+  }
+};
+
+bool TryMatchExtendingLoad(InstructionSelector* selector, Node* node) {
+  ExtendingLoadMatcher m(node, selector);
+  return m.Matches();
+}
+
+bool TryEmitExtendingLoad(InstructionSelector* selector, Node* node) {
+  ExtendingLoadMatcher m(node, selector);
+  Arm64OperandGenerator g(selector);
+  if (m.Matches()) {
+    InstructionOperand inputs[2];
+    inputs[0] = g.UseRegister(m.base());
+    InstructionCode opcode =
+        m.opcode() | AddressingModeField::encode(kMode_MRI);
+    DCHECK(is_int32(m.immediate()));
+    inputs[1] = g.TempImmediate(static_cast<int32_t>(m.immediate()));
+    InstructionOperand outputs[] = {g.DefineAsRegister(node)};
+    selector->Emit(opcode, arraysize(outputs), outputs, arraysize(inputs),
+                   inputs);
+    return true;
+  }
+  return false;
+}
+
 bool TryMatchAnyShift(InstructionSelector* selector, Node* node,
                      Node* input_node, InstructionCode* opcode, bool try_ror) {
  Arm64OperandGenerator g(selector);
@ -179,7 +251,10 @@ bool TryMatchAnyShift(InstructionSelector* selector, Node* node,
      *opcode |= AddressingModeField::encode(kMode_Operand2_R_LSR_I);
      return true;
    case IrOpcode::kWord32Sar:
+      *opcode |= AddressingModeField::encode(kMode_Operand2_R_ASR_I);
+      return true;
    case IrOpcode::kWord64Sar:
+      if (TryMatchExtendingLoad(selector, input_node)) return false;
      *opcode |= AddressingModeField::encode(kMode_Operand2_R_ASR_I);
      return true;
    case IrOpcode::kWord32Ror:
@ -1130,6 +1205,7 @@ void InstructionSelector::VisitWord32Sar(Node* node) {


 void InstructionSelector::VisitWord64Sar(Node* node) {
+  if (TryEmitExtendingLoad(this, node)) return;
  VisitRRO(this, kArm64Asr, node, kShift64Imm);
 }

--- a/test/unittests/compiler/arm64/instruction-selector-arm64-unittest.cc
+++ b/test/unittests/compiler/arm64/instruction-selector-arm64-unittest.cc
@ -4286,6 +4286,31 @@ TEST_F(InstructionSelectorTest, Float64Neg) {
  EXPECT_EQ(s.ToVreg(n), s.ToVreg(s[0]->Output()));
 }

+TEST_F(InstructionSelectorTest, LoadAndShiftRight) {
+  {
+    int32_t immediates[] = {-256, -255, -3,   -2,   -1,    0,    1,
+                            2,    3,    255,  256,  260,   4096, 4100,
+                            8192, 8196, 3276, 3280, 16376, 16380};
+    TRACED_FOREACH(int32_t, index, immediates) {
+      StreamBuilder m(this, MachineType::Uint64(), MachineType::Pointer());
+      Node* const load = m.Load(MachineType::Uint64(), m.Parameter(0),
+                                m.Int32Constant(index - 4));
+      Node* const sar = m.Word64Sar(load, m.Int32Constant(32));
+      // Make sure we don't fold the shift into the following add:
+      m.Return(m.Int64Add(sar, m.Parameter(0)));
+      Stream s = m.Build();
+      ASSERT_EQ(2U, s.size());
+      EXPECT_EQ(kArm64Ldrsw, s[0]->arch_opcode());
+      EXPECT_EQ(kMode_MRI, s[0]->addressing_mode());
+      EXPECT_EQ(2U, s[0]->InputCount());
+      EXPECT_EQ(s.ToVreg(m.Parameter(0)), s.ToVreg(s[0]->InputAt(0)));
+      ASSERT_EQ(InstructionOperand::IMMEDIATE, s[0]->InputAt(1)->kind());
+      EXPECT_EQ(index, s.ToInt32(s[0]->InputAt(1)));
+      ASSERT_EQ(1U, s[0]->OutputCount());
+    }
+  }
+}
+
 }  // namespace compiler
 }  // namespace internal
 }  // namespace v8