[arm64][turbofan] FP simplification
FNMUL is efficient arm64 instruction, which can save 1 cycle by optimizing FNEG(FMUL x y)) to FNMUL x y and FMUL((FNEG x) y) to FNMUL x y Change-Id: If25d9de1253098b17033a9d8736ff6a1c06601f3 Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/1572681 Commit-Queue: Georg Neis <neis@chromium.org> Reviewed-by: Martyn Capewell <martyn.capewell@arm.com> Reviewed-by: Georg Neis <neis@chromium.org> Cr-Commit-Position: refs/heads/master@{#61230}
This commit is contained in:
parent
7d17fd465d
commit
529ed9e992
1
AUTHORS
1
AUTHORS
@ -25,6 +25,7 @@ Home Jinni Inc. <*@homejinni.com>
|
||||
IBM Inc. <*@*.ibm.com>
|
||||
IBM Inc. <*@ibm.com>
|
||||
Samsung <*@*.samsung.com>
|
||||
Samsung <*@samsung.com>
|
||||
Joyent, Inc <*@joyent.com>
|
||||
RT-RK Computer Based System <*@rt-rk.com>
|
||||
Amazon, Inc <*@amazon.com>
|
||||
|
@ -388,6 +388,7 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
|
||||
V(fmla, Fmla) \
|
||||
V(fmls, Fmls) \
|
||||
V(fmulx, Fmulx) \
|
||||
V(fnmul, Fnmul) \
|
||||
V(frecps, Frecps) \
|
||||
V(frsqrts, Frsqrts) \
|
||||
V(mla, Mla) \
|
||||
|
@ -1340,6 +1340,11 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
|
||||
case kArm64Float32Sqrt:
|
||||
__ Fsqrt(i.OutputFloat32Register(), i.InputFloat32Register(0));
|
||||
break;
|
||||
case kArm64Float32Fnmul: {
|
||||
__ Fnmul(i.OutputFloat32Register(), i.InputFloat32Register(0),
|
||||
i.InputFloat32Register(1));
|
||||
break;
|
||||
}
|
||||
case kArm64Float64Cmp:
|
||||
if (instr->InputAt(1)->IsFPRegister()) {
|
||||
__ Fcmp(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
|
||||
@ -1405,6 +1410,10 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
|
||||
case kArm64Float64Sqrt:
|
||||
__ Fsqrt(i.OutputDoubleRegister(), i.InputDoubleRegister(0));
|
||||
break;
|
||||
case kArm64Float64Fnmul:
|
||||
__ Fnmul(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
|
||||
i.InputDoubleRegister(1));
|
||||
break;
|
||||
case kArm64Float32ToFloat64:
|
||||
__ Fcvt(i.OutputDoubleRegister(), i.InputDoubleRegister(0).S());
|
||||
break;
|
||||
|
@ -95,6 +95,7 @@ namespace compiler {
|
||||
V(Arm64Float32Abs) \
|
||||
V(Arm64Float32Neg) \
|
||||
V(Arm64Float32Sqrt) \
|
||||
V(Arm64Float32Fnmul) \
|
||||
V(Arm64Float32RoundDown) \
|
||||
V(Arm64Float32Max) \
|
||||
V(Arm64Float32Min) \
|
||||
@ -109,6 +110,7 @@ namespace compiler {
|
||||
V(Arm64Float64Abs) \
|
||||
V(Arm64Float64Neg) \
|
||||
V(Arm64Float64Sqrt) \
|
||||
V(Arm64Float64Fnmul) \
|
||||
V(Arm64Float64RoundDown) \
|
||||
V(Arm64Float32RoundUp) \
|
||||
V(Arm64Float64RoundUp) \
|
||||
|
@ -88,6 +88,7 @@ int InstructionScheduler::GetTargetInstructionFlags(
|
||||
case kArm64Float32Abs:
|
||||
case kArm64Float32Neg:
|
||||
case kArm64Float32Sqrt:
|
||||
case kArm64Float32Fnmul:
|
||||
case kArm64Float32RoundDown:
|
||||
case kArm64Float32Max:
|
||||
case kArm64Float32Min:
|
||||
@ -101,6 +102,7 @@ int InstructionScheduler::GetTargetInstructionFlags(
|
||||
case kArm64Float64Abs:
|
||||
case kArm64Float64Neg:
|
||||
case kArm64Float64Sqrt:
|
||||
case kArm64Float64Fnmul:
|
||||
case kArm64Float64RoundDown:
|
||||
case kArm64Float64RoundTiesAway:
|
||||
case kArm64Float64RoundTruncate:
|
||||
|
@ -1238,8 +1238,6 @@ void InstructionSelector::VisitWord64Ror(Node* node) {
|
||||
V(Float64RoundTiesAway, kArm64Float64RoundTiesAway) \
|
||||
V(Float32RoundTiesEven, kArm64Float32RoundTiesEven) \
|
||||
V(Float64RoundTiesEven, kArm64Float64RoundTiesEven) \
|
||||
V(Float32Neg, kArm64Float32Neg) \
|
||||
V(Float64Neg, kArm64Float64Neg) \
|
||||
V(Float64ExtractLowWord32, kArm64Float64ExtractLowWord32) \
|
||||
V(Float64ExtractHighWord32, kArm64Float64ExtractHighWord32) \
|
||||
V(Float64SilenceNaN, kArm64Float64SilenceNaN)
|
||||
@ -1257,8 +1255,6 @@ void InstructionSelector::VisitWord64Ror(Node* node) {
|
||||
V(Float64Add, kArm64Float64Add) \
|
||||
V(Float32Sub, kArm64Float32Sub) \
|
||||
V(Float64Sub, kArm64Float64Sub) \
|
||||
V(Float32Mul, kArm64Float32Mul) \
|
||||
V(Float64Mul, kArm64Float64Mul) \
|
||||
V(Float32Div, kArm64Float32Div) \
|
||||
V(Float64Div, kArm64Float64Div) \
|
||||
V(Float32Max, kArm64Float32Max) \
|
||||
@ -2654,6 +2650,38 @@ void InstructionSelector::VisitUint64LessThanOrEqual(Node* node) {
|
||||
VisitWordCompare(this, node, kArm64Cmp, &cont, false, kArithmeticImm);
|
||||
}
|
||||
|
||||
void InstructionSelector::VisitFloat32Neg(Node* node) {
|
||||
Arm64OperandGenerator g(this);
|
||||
Node* in = node->InputAt(0);
|
||||
if (in->opcode() == IrOpcode::kFloat32Mul && CanCover(node, in)) {
|
||||
Float32BinopMatcher m(in);
|
||||
Emit(kArm64Float32Fnmul, g.DefineAsRegister(node),
|
||||
g.UseRegister(m.left().node()), g.UseRegister(m.right().node()));
|
||||
return;
|
||||
}
|
||||
VisitRR(this, kArm64Float32Neg, node);
|
||||
}
|
||||
|
||||
void InstructionSelector::VisitFloat32Mul(Node* node) {
|
||||
Arm64OperandGenerator g(this);
|
||||
Float32BinopMatcher m(node);
|
||||
|
||||
if (m.left().IsFloat32Neg() && CanCover(node, m.left().node())) {
|
||||
Emit(kArm64Float32Fnmul, g.DefineAsRegister(node),
|
||||
g.UseRegister(m.left().node()->InputAt(0)),
|
||||
g.UseRegister(m.right().node()));
|
||||
return;
|
||||
}
|
||||
|
||||
if (m.right().IsFloat32Neg() && CanCover(node, m.right().node())) {
|
||||
Emit(kArm64Float32Fnmul, g.DefineAsRegister(node),
|
||||
g.UseRegister(m.right().node()->InputAt(0)),
|
||||
g.UseRegister(m.left().node()));
|
||||
return;
|
||||
}
|
||||
return VisitRRR(this, kArm64Float32Mul, node);
|
||||
}
|
||||
|
||||
void InstructionSelector::VisitFloat32Equal(Node* node) {
|
||||
FlagsContinuation cont = FlagsContinuation::ForSet(kEqual, node);
|
||||
VisitFloat32Compare(this, node, &cont);
|
||||
@ -2719,6 +2747,38 @@ void InstructionSelector::VisitFloat64InsertHighWord32(Node* node) {
|
||||
g.UseRegister(left), g.UseRegister(right));
|
||||
}
|
||||
|
||||
void InstructionSelector::VisitFloat64Neg(Node* node) {
|
||||
Arm64OperandGenerator g(this);
|
||||
Node* in = node->InputAt(0);
|
||||
if (in->opcode() == IrOpcode::kFloat64Mul && CanCover(node, in)) {
|
||||
Float64BinopMatcher m(in);
|
||||
Emit(kArm64Float64Fnmul, g.DefineAsRegister(node),
|
||||
g.UseRegister(m.left().node()), g.UseRegister(m.right().node()));
|
||||
return;
|
||||
}
|
||||
VisitRR(this, kArm64Float64Neg, node);
|
||||
}
|
||||
|
||||
void InstructionSelector::VisitFloat64Mul(Node* node) {
|
||||
Arm64OperandGenerator g(this);
|
||||
Float64BinopMatcher m(node);
|
||||
|
||||
if (m.left().IsFloat64Neg() && CanCover(node, m.left().node())) {
|
||||
Emit(kArm64Float64Fnmul, g.DefineAsRegister(node),
|
||||
g.UseRegister(m.left().node()->InputAt(0)),
|
||||
g.UseRegister(m.right().node()));
|
||||
return;
|
||||
}
|
||||
|
||||
if (m.right().IsFloat64Neg() && CanCover(node, m.right().node())) {
|
||||
Emit(kArm64Float64Fnmul, g.DefineAsRegister(node),
|
||||
g.UseRegister(m.right().node()->InputAt(0)),
|
||||
g.UseRegister(m.left().node()));
|
||||
return;
|
||||
}
|
||||
return VisitRRR(this, kArm64Float64Mul, node);
|
||||
}
|
||||
|
||||
void InstructionSelector::VisitWord32AtomicLoad(Node* node) {
|
||||
LoadRepresentation load_rep = LoadRepresentationOf(node->op());
|
||||
ArchOpcode opcode = kArchNop;
|
||||
|
@ -3998,6 +3998,87 @@ TEST(RunFloat64MulP) {
|
||||
}
|
||||
}
|
||||
|
||||
TEST(RunFloat32MulAndFloat32Neg) {
|
||||
BufferedRawMachineAssemblerTester<float> m(MachineType::Float32(),
|
||||
MachineType::Float32());
|
||||
m.Return(m.Float32Neg(m.Float32Mul(m.Parameter(0), m.Parameter(1))));
|
||||
|
||||
FOR_FLOAT32_INPUTS(i) {
|
||||
FOR_FLOAT32_INPUTS(j) { CHECK_FLOAT_EQ(-(i * j), m.Call(i, j)); }
|
||||
}
|
||||
}
|
||||
|
||||
TEST(RunFloat64MulAndFloat64Neg) {
|
||||
BufferedRawMachineAssemblerTester<double> m(MachineType::Float64(),
|
||||
MachineType::Float64());
|
||||
m.Return(m.Float64Neg(m.Float64Mul(m.Parameter(0), m.Parameter(1))));
|
||||
|
||||
FOR_FLOAT64_INPUTS(i) {
|
||||
FOR_FLOAT64_INPUTS(j) { CHECK_DOUBLE_EQ(-(i * j), m.Call(i, j)); }
|
||||
}
|
||||
}
|
||||
|
||||
TEST(RunFloat32NegAndFloat32Mul1) {
|
||||
BufferedRawMachineAssemblerTester<float> m(MachineType::Float32(),
|
||||
MachineType::Float32());
|
||||
m.Return(m.Float32Mul(m.Float32Neg(m.Parameter(0)), m.Parameter(1)));
|
||||
|
||||
FOR_FLOAT32_INPUTS(i) {
|
||||
FOR_FLOAT32_INPUTS(j) { CHECK_FLOAT_EQ((-i * j), m.Call(i, j)); }
|
||||
}
|
||||
}
|
||||
|
||||
TEST(RunFloat64NegAndFloat64Mul1) {
|
||||
BufferedRawMachineAssemblerTester<double> m(MachineType::Float64(),
|
||||
MachineType::Float64());
|
||||
m.Return(m.Float64Mul(m.Float64Neg(m.Parameter(0)), m.Parameter(1)));
|
||||
|
||||
FOR_FLOAT64_INPUTS(i) {
|
||||
FOR_FLOAT64_INPUTS(j) { CHECK_DOUBLE_EQ((-i * j), m.Call(i, j)); }
|
||||
}
|
||||
}
|
||||
|
||||
TEST(RunFloat32NegAndFloat32Mul2) {
|
||||
BufferedRawMachineAssemblerTester<float> m(MachineType::Float32(),
|
||||
MachineType::Float32());
|
||||
m.Return(m.Float32Mul(m.Parameter(0), m.Float32Neg(m.Parameter(1))));
|
||||
|
||||
FOR_FLOAT32_INPUTS(i) {
|
||||
FOR_FLOAT32_INPUTS(j) { CHECK_FLOAT_EQ((i * -j), m.Call(i, j)); }
|
||||
}
|
||||
}
|
||||
|
||||
TEST(RunFloat64NegAndFloat64Mul2) {
|
||||
BufferedRawMachineAssemblerTester<double> m(MachineType::Float64(),
|
||||
MachineType::Float64());
|
||||
m.Return(m.Float64Mul(m.Parameter(0), m.Float64Neg(m.Parameter(1))));
|
||||
|
||||
FOR_FLOAT64_INPUTS(i) {
|
||||
FOR_FLOAT64_INPUTS(j) { CHECK_DOUBLE_EQ((i * -j), m.Call(i, j)); }
|
||||
}
|
||||
}
|
||||
|
||||
TEST(RunFloat32NegAndFloat32Mul3) {
|
||||
BufferedRawMachineAssemblerTester<float> m(MachineType::Float32(),
|
||||
MachineType::Float32());
|
||||
m.Return(
|
||||
m.Float32Mul(m.Float32Neg(m.Parameter(0)), m.Float32Neg(m.Parameter(1))));
|
||||
|
||||
FOR_FLOAT32_INPUTS(i) {
|
||||
FOR_FLOAT32_INPUTS(j) { CHECK_FLOAT_EQ((-i * -j), m.Call(i, j)); }
|
||||
}
|
||||
}
|
||||
|
||||
TEST(RunFloat64NegAndFloat64Mul3) {
|
||||
BufferedRawMachineAssemblerTester<double> m(MachineType::Float64(),
|
||||
MachineType::Float64());
|
||||
m.Return(
|
||||
m.Float64Mul(m.Float64Neg(m.Parameter(0)), m.Float64Neg(m.Parameter(1))));
|
||||
|
||||
FOR_FLOAT64_INPUTS(i) {
|
||||
FOR_FLOAT64_INPUTS(j) { CHECK_DOUBLE_EQ((-i * -j), m.Call(i, j)); }
|
||||
}
|
||||
}
|
||||
|
||||
TEST(RunFloat64MulAndFloat64Add1) {
|
||||
BufferedRawMachineAssemblerTester<double> m(
|
||||
|
@ -4368,6 +4368,78 @@ TEST_F(InstructionSelectorTest, Float64Neg) {
|
||||
EXPECT_EQ(s.ToVreg(n), s.ToVreg(s[0]->Output()));
|
||||
}
|
||||
|
||||
TEST_F(InstructionSelectorTest, Float32NegWithMul) {
|
||||
StreamBuilder m(this, MachineType::Float32(), MachineType::Float32(),
|
||||
MachineType::Float32());
|
||||
Node* const p0 = m.Parameter(0);
|
||||
Node* const p1 = m.Parameter(1);
|
||||
Node* const n1 = m.AddNode(m.machine()->Float32Mul(), p0, p1);
|
||||
Node* const n2 = m.AddNode(m.machine()->Float32Neg(), n1);
|
||||
m.Return(n2);
|
||||
Stream s = m.Build();
|
||||
ASSERT_EQ(1U, s.size());
|
||||
EXPECT_EQ(kArm64Float32Fnmul, s[0]->arch_opcode());
|
||||
ASSERT_EQ(2U, s[0]->InputCount());
|
||||
EXPECT_EQ(s.ToVreg(p0), s.ToVreg(s[0]->InputAt(0)));
|
||||
EXPECT_EQ(s.ToVreg(p1), s.ToVreg(s[0]->InputAt(1)));
|
||||
ASSERT_EQ(1U, s[0]->OutputCount());
|
||||
EXPECT_EQ(s.ToVreg(n2), s.ToVreg(s[0]->Output()));
|
||||
}
|
||||
|
||||
TEST_F(InstructionSelectorTest, Float64NegWithMul) {
|
||||
StreamBuilder m(this, MachineType::Float64(), MachineType::Float64(),
|
||||
MachineType::Float64());
|
||||
Node* const p0 = m.Parameter(0);
|
||||
Node* const p1 = m.Parameter(1);
|
||||
Node* const n1 = m.AddNode(m.machine()->Float64Mul(), p0, p1);
|
||||
Node* const n2 = m.AddNode(m.machine()->Float64Neg(), n1);
|
||||
m.Return(n2);
|
||||
Stream s = m.Build();
|
||||
ASSERT_EQ(1U, s.size());
|
||||
EXPECT_EQ(kArm64Float64Fnmul, s[0]->arch_opcode());
|
||||
ASSERT_EQ(2U, s[0]->InputCount());
|
||||
EXPECT_EQ(s.ToVreg(p0), s.ToVreg(s[0]->InputAt(0)));
|
||||
EXPECT_EQ(s.ToVreg(p1), s.ToVreg(s[0]->InputAt(1)));
|
||||
ASSERT_EQ(1U, s[0]->OutputCount());
|
||||
EXPECT_EQ(s.ToVreg(n2), s.ToVreg(s[0]->Output()));
|
||||
}
|
||||
|
||||
TEST_F(InstructionSelectorTest, Float32MulWithNeg) {
|
||||
StreamBuilder m(this, MachineType::Float32(), MachineType::Float32(),
|
||||
MachineType::Float32());
|
||||
Node* const p0 = m.Parameter(0);
|
||||
Node* const p1 = m.Parameter(1);
|
||||
Node* const n1 = m.AddNode(m.machine()->Float32Neg(), p0);
|
||||
Node* const n2 = m.AddNode(m.machine()->Float32Mul(), n1, p1);
|
||||
m.Return(n2);
|
||||
Stream s = m.Build();
|
||||
ASSERT_EQ(1U, s.size());
|
||||
EXPECT_EQ(kArm64Float32Fnmul, s[0]->arch_opcode());
|
||||
ASSERT_EQ(2U, s[0]->InputCount());
|
||||
EXPECT_EQ(s.ToVreg(p0), s.ToVreg(s[0]->InputAt(0)));
|
||||
EXPECT_EQ(s.ToVreg(p1), s.ToVreg(s[0]->InputAt(1)));
|
||||
ASSERT_EQ(1U, s[0]->OutputCount());
|
||||
EXPECT_EQ(s.ToVreg(n2), s.ToVreg(s[0]->Output()));
|
||||
}
|
||||
|
||||
TEST_F(InstructionSelectorTest, Float64MulWithNeg) {
|
||||
StreamBuilder m(this, MachineType::Float64(), MachineType::Float64(),
|
||||
MachineType::Float64());
|
||||
Node* const p0 = m.Parameter(0);
|
||||
Node* const p1 = m.Parameter(1);
|
||||
Node* const n1 = m.AddNode(m.machine()->Float64Neg(), p0);
|
||||
Node* const n2 = m.AddNode(m.machine()->Float64Mul(), n1, p1);
|
||||
m.Return(n2);
|
||||
Stream s = m.Build();
|
||||
ASSERT_EQ(1U, s.size());
|
||||
EXPECT_EQ(kArm64Float64Fnmul, s[0]->arch_opcode());
|
||||
ASSERT_EQ(2U, s[0]->InputCount());
|
||||
EXPECT_EQ(s.ToVreg(p0), s.ToVreg(s[0]->InputAt(0)));
|
||||
EXPECT_EQ(s.ToVreg(p1), s.ToVreg(s[0]->InputAt(1)));
|
||||
ASSERT_EQ(1U, s[0]->OutputCount());
|
||||
EXPECT_EQ(s.ToVreg(n2), s.ToVreg(s[0]->Output()));
|
||||
}
|
||||
|
||||
TEST_F(InstructionSelectorTest, LoadAndShiftRight) {
|
||||
{
|
||||
int32_t immediates[] = {-256, -255, -3, -2, -1, 0, 1,
|
||||
|
Loading…
Reference in New Issue
Block a user