[arm64][turbofan] Fold sign extensions and right shifts
Code from ARES-6 Basic: ldur w11, [x5, #15] asr w11, w11, #1 sxtw x11, w11 With this CL: ldur w11, [x5, #15] sbfx x11, x11, #1, #31 This increases performance of Ares6 Basic by ~2% on Cortex-A53. Also reduces the snapshot by ~2000 instructions. Change-Id: Ie9801da730f832337306422d2a9c63461d9e5690 Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/1849530 Reviewed-by: Michael Starzinger <mstarzinger@chromium.org> Commit-Queue: Martyn Capewell <martyn.capewell@arm.com> Cr-Commit-Position: refs/heads/master@{#64235}
This commit is contained in:
parent
57cdda9eb4
commit
9b6e45e179
@ -1228,6 +1228,10 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
|
||||
case kArm64Sxtw:
|
||||
__ Sxtw(i.OutputRegister(), i.InputRegister32(0));
|
||||
break;
|
||||
case kArm64Sbfx:
|
||||
__ Sbfx(i.OutputRegister(), i.InputRegister(0), i.InputInt6(1),
|
||||
i.InputInt6(2));
|
||||
break;
|
||||
case kArm64Sbfx32:
|
||||
__ Sbfx(i.OutputRegister32(), i.InputRegister32(0), i.InputInt5(1),
|
||||
i.InputInt5(2));
|
||||
|
@ -70,6 +70,7 @@ namespace compiler {
|
||||
V(Arm64Sxtb) \
|
||||
V(Arm64Sxth) \
|
||||
V(Arm64Sxtw) \
|
||||
V(Arm64Sbfx) \
|
||||
V(Arm64Sbfx32) \
|
||||
V(Arm64Ubfx) \
|
||||
V(Arm64Ubfx32) \
|
||||
|
@ -71,6 +71,7 @@ int InstructionScheduler::GetTargetInstructionFlags(
|
||||
case kArm64Sxth:
|
||||
case kArm64Sxth32:
|
||||
case kArm64Sxtw:
|
||||
case kArm64Sbfx:
|
||||
case kArm64Sbfx32:
|
||||
case kArm64Ubfx:
|
||||
case kArm64Ubfx32:
|
||||
@ -446,6 +447,7 @@ int InstructionScheduler::GetInstructionLatency(const Instruction* instr) {
|
||||
|
||||
case kArm64Clz:
|
||||
case kArm64Clz32:
|
||||
case kArm64Sbfx:
|
||||
case kArm64Sbfx32:
|
||||
case kArm64Sxtb32:
|
||||
case kArm64Sxth32:
|
||||
|
@ -1598,9 +1598,22 @@ void InstructionSelector::VisitChangeInt32ToInt64(Node* node) {
|
||||
return;
|
||||
}
|
||||
EmitLoad(this, value, opcode, immediate_mode, rep, node);
|
||||
} else {
|
||||
VisitRR(this, kArm64Sxtw, node);
|
||||
return;
|
||||
}
|
||||
|
||||
if (value->opcode() == IrOpcode::kWord32Sar && CanCover(node, value)) {
|
||||
Int32BinopMatcher m(value);
|
||||
if (m.right().HasValue()) {
|
||||
Arm64OperandGenerator g(this);
|
||||
// Mask the shift amount, to keep the same semantics as Word32Sar.
|
||||
int right = m.right().Value() & 0x1F;
|
||||
Emit(kArm64Sbfx, g.DefineAsRegister(node), g.UseRegister(m.left().node()),
|
||||
g.TempImmediate(right), g.TempImmediate(32 - right));
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
VisitRR(this, kArm64Sxtw, node);
|
||||
}
|
||||
|
||||
void InstructionSelector::VisitChangeUint32ToUint64(Node* node) {
|
||||
|
@ -2659,6 +2659,22 @@ TEST_F(InstructionSelectorTest, ChangeInt32ToInt64AfterLoad) {
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(InstructionSelectorTest, ChangeInt32ToInt64WithWord32Sar) {
|
||||
// Test the mod 32 behaviour of Word32Sar by iterating up to 33.
|
||||
TRACED_FORRANGE(int32_t, imm, 0, 33) {
|
||||
StreamBuilder m(this, MachineType::Int64(), MachineType::Int32());
|
||||
m.Return(m.ChangeInt32ToInt64(
|
||||
m.Word32Sar(m.Parameter(0), m.Int32Constant(imm))));
|
||||
Stream s = m.Build();
|
||||
ASSERT_EQ(1U, s.size());
|
||||
EXPECT_EQ(kArm64Sbfx, s[0]->arch_opcode());
|
||||
EXPECT_EQ(3U, s[0]->InputCount());
|
||||
EXPECT_EQ(1U, s[0]->OutputCount());
|
||||
EXPECT_EQ(imm & 0x1f, s.ToInt32(s[0]->InputAt(1)));
|
||||
EXPECT_EQ(32 - (imm & 0x1f), s.ToInt32(s[0]->InputAt(2)));
|
||||
}
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
// Memory access instructions.
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user