[arm64][turbofan] Fold sign extensions and right shifts

Code from ARES-6 Basic:

  ldur w11, [x5, #15]
  asr w11, w11, #1
  sxtw x11, w11

With this CL:

  ldur w11, [x5, #15]
  sbfx x11, x11, #1, #31

This increases performance of Ares6 Basic by ~2% on Cortex-A53.
Also reduces the snapshot by ~2000 instructions.

Change-Id: Ie9801da730f832337306422d2a9c63461d9e5690
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/1849530
Reviewed-by: Michael Starzinger <mstarzinger@chromium.org>
Commit-Queue: Martyn Capewell <martyn.capewell@arm.com>
Cr-Commit-Position: refs/heads/master@{#64235}
This commit is contained in:
Joey Gouly 2019-10-09 17:09:26 +01:00 committed by Commit Bot
parent 57cdda9eb4
commit 9b6e45e179
5 changed files with 38 additions and 2 deletions

View File

@ -1228,6 +1228,10 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
case kArm64Sxtw: case kArm64Sxtw:
__ Sxtw(i.OutputRegister(), i.InputRegister32(0)); __ Sxtw(i.OutputRegister(), i.InputRegister32(0));
break; break;
case kArm64Sbfx:
__ Sbfx(i.OutputRegister(), i.InputRegister(0), i.InputInt6(1),
i.InputInt6(2));
break;
case kArm64Sbfx32: case kArm64Sbfx32:
__ Sbfx(i.OutputRegister32(), i.InputRegister32(0), i.InputInt5(1), __ Sbfx(i.OutputRegister32(), i.InputRegister32(0), i.InputInt5(1),
i.InputInt5(2)); i.InputInt5(2));

View File

@ -70,6 +70,7 @@ namespace compiler {
V(Arm64Sxtb) \ V(Arm64Sxtb) \
V(Arm64Sxth) \ V(Arm64Sxth) \
V(Arm64Sxtw) \ V(Arm64Sxtw) \
V(Arm64Sbfx) \
V(Arm64Sbfx32) \ V(Arm64Sbfx32) \
V(Arm64Ubfx) \ V(Arm64Ubfx) \
V(Arm64Ubfx32) \ V(Arm64Ubfx32) \

View File

@ -71,6 +71,7 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kArm64Sxth: case kArm64Sxth:
case kArm64Sxth32: case kArm64Sxth32:
case kArm64Sxtw: case kArm64Sxtw:
case kArm64Sbfx:
case kArm64Sbfx32: case kArm64Sbfx32:
case kArm64Ubfx: case kArm64Ubfx:
case kArm64Ubfx32: case kArm64Ubfx32:
@ -446,6 +447,7 @@ int InstructionScheduler::GetInstructionLatency(const Instruction* instr) {
case kArm64Clz: case kArm64Clz:
case kArm64Clz32: case kArm64Clz32:
case kArm64Sbfx:
case kArm64Sbfx32: case kArm64Sbfx32:
case kArm64Sxtb32: case kArm64Sxtb32:
case kArm64Sxth32: case kArm64Sxth32:

View File

@ -1598,9 +1598,22 @@ void InstructionSelector::VisitChangeInt32ToInt64(Node* node) {
return; return;
} }
EmitLoad(this, value, opcode, immediate_mode, rep, node); EmitLoad(this, value, opcode, immediate_mode, rep, node);
} else { return;
VisitRR(this, kArm64Sxtw, node);
} }
if (value->opcode() == IrOpcode::kWord32Sar && CanCover(node, value)) {
Int32BinopMatcher m(value);
if (m.right().HasValue()) {
Arm64OperandGenerator g(this);
// Mask the shift amount, to keep the same semantics as Word32Sar.
int right = m.right().Value() & 0x1F;
Emit(kArm64Sbfx, g.DefineAsRegister(node), g.UseRegister(m.left().node()),
g.TempImmediate(right), g.TempImmediate(32 - right));
return;
}
}
VisitRR(this, kArm64Sxtw, node);
} }
void InstructionSelector::VisitChangeUint32ToUint64(Node* node) { void InstructionSelector::VisitChangeUint32ToUint64(Node* node) {

View File

@ -2659,6 +2659,22 @@ TEST_F(InstructionSelectorTest, ChangeInt32ToInt64AfterLoad) {
} }
} }
TEST_F(InstructionSelectorTest, ChangeInt32ToInt64WithWord32Sar) {
// Test the mod 32 behaviour of Word32Sar by iterating up to 33.
TRACED_FORRANGE(int32_t, imm, 0, 33) {
StreamBuilder m(this, MachineType::Int64(), MachineType::Int32());
m.Return(m.ChangeInt32ToInt64(
m.Word32Sar(m.Parameter(0), m.Int32Constant(imm))));
Stream s = m.Build();
ASSERT_EQ(1U, s.size());
EXPECT_EQ(kArm64Sbfx, s[0]->arch_opcode());
EXPECT_EQ(3U, s[0]->InputCount());
EXPECT_EQ(1U, s[0]->OutputCount());
EXPECT_EQ(imm & 0x1f, s.ToInt32(s[0]->InputAt(1)));
EXPECT_EQ(32 - (imm & 0x1f), s.ToInt32(s[0]->InputAt(2)));
}
}
// ----------------------------------------------------------------------------- // -----------------------------------------------------------------------------
// Memory access instructions. // Memory access instructions.