[ARM64] Optimize load followed by shift.

Instead of loading 64 bits and shifting:

      ldr x0, [x1, #offset]
      asr x0, x0, #32

    directly load the interesting 32 bits and sign-extend:

      ldrsw x0, [x1, #offset+4]

BUG=

Review-Url: https://codereview.chromium.org/2243843002
Cr-Commit-Position: refs/heads/master@{#38622}
This commit is contained in:
georgia.kouveli 2016-08-12 06:55:33 -07:00 committed by Commit bot
parent f03bebc77d
commit dc6b5109d7
2 changed files with 101 additions and 0 deletions

View File

@ -161,6 +161,78 @@ void VisitRRO(InstructionSelector* selector, ArchOpcode opcode, Node* node,
g.UseOperand(node->InputAt(1), operand_mode)); g.UseOperand(node->InputAt(1), operand_mode));
} }
struct ExtendingLoadMatcher {
ExtendingLoadMatcher(Node* node, InstructionSelector* selector)
: matches_(false), selector_(selector), base_(nullptr), immediate_(0) {
Initialize(node);
}
bool Matches() const { return matches_; }
Node* base() const {
DCHECK(Matches());
return base_;
}
int64_t immediate() const {
DCHECK(Matches());
return immediate_;
}
ArchOpcode opcode() const {
DCHECK(Matches());
return opcode_;
}
private:
bool matches_;
InstructionSelector* selector_;
Node* base_;
int64_t immediate_;
ArchOpcode opcode_;
void Initialize(Node* node) {
Int64BinopMatcher m(node);
// When loading a 64-bit value and shifting by 32, we should
// just load and sign-extend the interesting 4 bytes instead.
// This happens, for example, when we're loading and untagging SMIs.
DCHECK(m.IsWord64Sar());
if (m.left().IsLoad() && m.right().Is(32) &&
selector_->CanCover(m.node(), m.left().node())) {
Arm64OperandGenerator g(selector_);
Node* load = m.left().node();
Node* offset = load->InputAt(1);
base_ = load->InputAt(0);
opcode_ = kArm64Ldrsw;
if (g.IsIntegerConstant(offset)) {
immediate_ = g.GetIntegerConstantValue(offset) + 4;
matches_ = g.CanBeImmediate(immediate_, kLoadStoreImm32);
}
}
}
};
bool TryMatchExtendingLoad(InstructionSelector* selector, Node* node) {
ExtendingLoadMatcher m(node, selector);
return m.Matches();
}
bool TryEmitExtendingLoad(InstructionSelector* selector, Node* node) {
ExtendingLoadMatcher m(node, selector);
Arm64OperandGenerator g(selector);
if (m.Matches()) {
InstructionOperand inputs[2];
inputs[0] = g.UseRegister(m.base());
InstructionCode opcode =
m.opcode() | AddressingModeField::encode(kMode_MRI);
DCHECK(is_int32(m.immediate()));
inputs[1] = g.TempImmediate(static_cast<int32_t>(m.immediate()));
InstructionOperand outputs[] = {g.DefineAsRegister(node)};
selector->Emit(opcode, arraysize(outputs), outputs, arraysize(inputs),
inputs);
return true;
}
return false;
}
bool TryMatchAnyShift(InstructionSelector* selector, Node* node, bool TryMatchAnyShift(InstructionSelector* selector, Node* node,
Node* input_node, InstructionCode* opcode, bool try_ror) { Node* input_node, InstructionCode* opcode, bool try_ror) {
Arm64OperandGenerator g(selector); Arm64OperandGenerator g(selector);
@ -179,7 +251,10 @@ bool TryMatchAnyShift(InstructionSelector* selector, Node* node,
*opcode |= AddressingModeField::encode(kMode_Operand2_R_LSR_I); *opcode |= AddressingModeField::encode(kMode_Operand2_R_LSR_I);
return true; return true;
case IrOpcode::kWord32Sar: case IrOpcode::kWord32Sar:
*opcode |= AddressingModeField::encode(kMode_Operand2_R_ASR_I);
return true;
case IrOpcode::kWord64Sar: case IrOpcode::kWord64Sar:
if (TryMatchExtendingLoad(selector, input_node)) return false;
*opcode |= AddressingModeField::encode(kMode_Operand2_R_ASR_I); *opcode |= AddressingModeField::encode(kMode_Operand2_R_ASR_I);
return true; return true;
case IrOpcode::kWord32Ror: case IrOpcode::kWord32Ror:
@ -1130,6 +1205,7 @@ void InstructionSelector::VisitWord32Sar(Node* node) {
void InstructionSelector::VisitWord64Sar(Node* node) { void InstructionSelector::VisitWord64Sar(Node* node) {
if (TryEmitExtendingLoad(this, node)) return;
VisitRRO(this, kArm64Asr, node, kShift64Imm); VisitRRO(this, kArm64Asr, node, kShift64Imm);
} }

View File

@ -4286,6 +4286,31 @@ TEST_F(InstructionSelectorTest, Float64Neg) {
EXPECT_EQ(s.ToVreg(n), s.ToVreg(s[0]->Output())); EXPECT_EQ(s.ToVreg(n), s.ToVreg(s[0]->Output()));
} }
TEST_F(InstructionSelectorTest, LoadAndShiftRight) {
{
int32_t immediates[] = {-256, -255, -3, -2, -1, 0, 1,
2, 3, 255, 256, 260, 4096, 4100,
8192, 8196, 3276, 3280, 16376, 16380};
TRACED_FOREACH(int32_t, index, immediates) {
StreamBuilder m(this, MachineType::Uint64(), MachineType::Pointer());
Node* const load = m.Load(MachineType::Uint64(), m.Parameter(0),
m.Int32Constant(index - 4));
Node* const sar = m.Word64Sar(load, m.Int32Constant(32));
// Make sure we don't fold the shift into the following add:
m.Return(m.Int64Add(sar, m.Parameter(0)));
Stream s = m.Build();
ASSERT_EQ(2U, s.size());
EXPECT_EQ(kArm64Ldrsw, s[0]->arch_opcode());
EXPECT_EQ(kMode_MRI, s[0]->addressing_mode());
EXPECT_EQ(2U, s[0]->InputCount());
EXPECT_EQ(s.ToVreg(m.Parameter(0)), s.ToVreg(s[0]->InputAt(0)));
ASSERT_EQ(InstructionOperand::IMMEDIATE, s[0]->InputAt(1)->kind());
EXPECT_EQ(index, s.ToInt32(s[0]->InputAt(1)));
ASSERT_EQ(1U, s[0]->OutputCount());
}
}
}
} // namespace compiler } // namespace compiler
} // namespace internal } // namespace internal
} // namespace v8 } // namespace v8