From 9b6e45e17907e06dee5a55c36d3ad421c9260ac1 Mon Sep 17 00:00:00 2001 From: Joey Gouly Date: Wed, 9 Oct 2019 17:09:26 +0100 Subject: [PATCH] [arm64][turbofan] Fold sign extensions and right shifts Code from ARES-6 Basic: ldur w11, [x5, #15] asr w11, w11, #1 sxtw x11, w11 With this CL: ldur w11, [x5, #15] sbfx x11, x11, #1, #31 This increases performance of Ares6 Basic by ~2% on Cortex-A53. Also reduces the snapshot by ~2000 instructions. Change-Id: Ie9801da730f832337306422d2a9c63461d9e5690 Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/1849530 Reviewed-by: Michael Starzinger Commit-Queue: Martyn Capewell Cr-Commit-Position: refs/heads/master@{#64235} --- .../backend/arm64/code-generator-arm64.cc | 4 ++++ .../backend/arm64/instruction-codes-arm64.h | 1 + .../arm64/instruction-scheduler-arm64.cc | 2 ++ .../backend/arm64/instruction-selector-arm64.cc | 17 +++++++++++++++-- .../instruction-selector-arm64-unittest.cc | 16 ++++++++++++++++ 5 files changed, 38 insertions(+), 2 deletions(-) diff --git a/src/compiler/backend/arm64/code-generator-arm64.cc b/src/compiler/backend/arm64/code-generator-arm64.cc index c43fa8fd20..666ee473ff 100644 --- a/src/compiler/backend/arm64/code-generator-arm64.cc +++ b/src/compiler/backend/arm64/code-generator-arm64.cc @@ -1228,6 +1228,10 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( case kArm64Sxtw: __ Sxtw(i.OutputRegister(), i.InputRegister32(0)); break; + case kArm64Sbfx: + __ Sbfx(i.OutputRegister(), i.InputRegister(0), i.InputInt6(1), + i.InputInt6(2)); + break; case kArm64Sbfx32: __ Sbfx(i.OutputRegister32(), i.InputRegister32(0), i.InputInt5(1), i.InputInt5(2)); diff --git a/src/compiler/backend/arm64/instruction-codes-arm64.h b/src/compiler/backend/arm64/instruction-codes-arm64.h index f379a02f2c..76fada41fa 100644 --- a/src/compiler/backend/arm64/instruction-codes-arm64.h +++ b/src/compiler/backend/arm64/instruction-codes-arm64.h @@ -70,6 +70,7 @@ namespace compiler { V(Arm64Sxtb) \ V(Arm64Sxth) \ V(Arm64Sxtw) \ + V(Arm64Sbfx) \ V(Arm64Sbfx32) \ V(Arm64Ubfx) \ V(Arm64Ubfx32) \ diff --git a/src/compiler/backend/arm64/instruction-scheduler-arm64.cc b/src/compiler/backend/arm64/instruction-scheduler-arm64.cc index 254efb7184..59d3f05d1e 100644 --- a/src/compiler/backend/arm64/instruction-scheduler-arm64.cc +++ b/src/compiler/backend/arm64/instruction-scheduler-arm64.cc @@ -71,6 +71,7 @@ int InstructionScheduler::GetTargetInstructionFlags( case kArm64Sxth: case kArm64Sxth32: case kArm64Sxtw: + case kArm64Sbfx: case kArm64Sbfx32: case kArm64Ubfx: case kArm64Ubfx32: @@ -446,6 +447,7 @@ int InstructionScheduler::GetInstructionLatency(const Instruction* instr) { case kArm64Clz: case kArm64Clz32: + case kArm64Sbfx: case kArm64Sbfx32: case kArm64Sxtb32: case kArm64Sxth32: diff --git a/src/compiler/backend/arm64/instruction-selector-arm64.cc b/src/compiler/backend/arm64/instruction-selector-arm64.cc index 024744b1e0..f71a19716b 100644 --- a/src/compiler/backend/arm64/instruction-selector-arm64.cc +++ b/src/compiler/backend/arm64/instruction-selector-arm64.cc @@ -1598,9 +1598,22 @@ void InstructionSelector::VisitChangeInt32ToInt64(Node* node) { return; } EmitLoad(this, value, opcode, immediate_mode, rep, node); - } else { - VisitRR(this, kArm64Sxtw, node); + return; } + + if (value->opcode() == IrOpcode::kWord32Sar && CanCover(node, value)) { + Int32BinopMatcher m(value); + if (m.right().HasValue()) { + Arm64OperandGenerator g(this); + // Mask the shift amount, to keep the same semantics as Word32Sar. + int right = m.right().Value() & 0x1F; + Emit(kArm64Sbfx, g.DefineAsRegister(node), g.UseRegister(m.left().node()), + g.TempImmediate(right), g.TempImmediate(32 - right)); + return; + } + } + + VisitRR(this, kArm64Sxtw, node); } void InstructionSelector::VisitChangeUint32ToUint64(Node* node) { diff --git a/test/unittests/compiler/arm64/instruction-selector-arm64-unittest.cc b/test/unittests/compiler/arm64/instruction-selector-arm64-unittest.cc index 76839288e9..d7a3a92c96 100644 --- a/test/unittests/compiler/arm64/instruction-selector-arm64-unittest.cc +++ b/test/unittests/compiler/arm64/instruction-selector-arm64-unittest.cc @@ -2659,6 +2659,22 @@ TEST_F(InstructionSelectorTest, ChangeInt32ToInt64AfterLoad) { } } +TEST_F(InstructionSelectorTest, ChangeInt32ToInt64WithWord32Sar) { + // Test the mod 32 behaviour of Word32Sar by iterating up to 33. + TRACED_FORRANGE(int32_t, imm, 0, 33) { + StreamBuilder m(this, MachineType::Int64(), MachineType::Int32()); + m.Return(m.ChangeInt32ToInt64( + m.Word32Sar(m.Parameter(0), m.Int32Constant(imm)))); + Stream s = m.Build(); + ASSERT_EQ(1U, s.size()); + EXPECT_EQ(kArm64Sbfx, s[0]->arch_opcode()); + EXPECT_EQ(3U, s[0]->InputCount()); + EXPECT_EQ(1U, s[0]->OutputCount()); + EXPECT_EQ(imm & 0x1f, s.ToInt32(s[0]->InputAt(1))); + EXPECT_EQ(32 - (imm & 0x1f), s.ToInt32(s[0]->InputAt(2))); + } +} + // ----------------------------------------------------------------------------- // Memory access instructions.