[ia32] Merge SSE/AVX float32/float64 add sub mul div
This removes 8 arch opcodes. Bug: v8:11217 Change-Id: I2c7a73b032ba5fa21f9843ebb4325e226a22550a Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3114590 Reviewed-by: Adam Klein <adamk@chromium.org> Commit-Queue: Zhi An Ng <zhin@chromium.org> Cr-Commit-Position: refs/heads/main@{#76442}
This commit is contained in:
parent
360fdbdee5
commit
09413a884f
@ -165,6 +165,8 @@ class V8_EXPORT_PRIVATE SharedTurboAssembler : public TurboAssemblerBase {
|
||||
// Keep this list sorted by required extension, then instruction name.
|
||||
AVX_OP(Addpd, addpd)
|
||||
AVX_OP(Addps, addps)
|
||||
AVX_OP(Addsd, addsd)
|
||||
AVX_OP(Addss, addss)
|
||||
AVX_OP(Andnpd, andnpd)
|
||||
AVX_OP(Andnps, andnps)
|
||||
AVX_OP(Andpd, andpd)
|
||||
@ -183,6 +185,8 @@ class V8_EXPORT_PRIVATE SharedTurboAssembler : public TurboAssemblerBase {
|
||||
AVX_OP(Cvttps2dq, cvttps2dq)
|
||||
AVX_OP(Divpd, divpd)
|
||||
AVX_OP(Divps, divps)
|
||||
AVX_OP(Divsd, divsd)
|
||||
AVX_OP(Divss, divss)
|
||||
AVX_OP(Maxpd, maxpd)
|
||||
AVX_OP(Maxps, maxps)
|
||||
AVX_OP(Minpd, minpd)
|
||||
@ -200,6 +204,8 @@ class V8_EXPORT_PRIVATE SharedTurboAssembler : public TurboAssemblerBase {
|
||||
AVX_OP(Movups, movups)
|
||||
AVX_OP(Mulpd, mulpd)
|
||||
AVX_OP(Mulps, mulps)
|
||||
AVX_OP(Mulsd, mulsd)
|
||||
AVX_OP(Mulss, mulss)
|
||||
AVX_OP(Orpd, orpd)
|
||||
AVX_OP(Orps, orps)
|
||||
AVX_OP(Packssdw, packssdw)
|
||||
@ -259,6 +265,8 @@ class V8_EXPORT_PRIVATE SharedTurboAssembler : public TurboAssemblerBase {
|
||||
AVX_OP(Sqrtss, sqrtss)
|
||||
AVX_OP(Subpd, subpd)
|
||||
AVX_OP(Subps, subps)
|
||||
AVX_OP(Subsd, subsd)
|
||||
AVX_OP(Subss, subss)
|
||||
AVX_OP(Unpcklps, unpcklps)
|
||||
AVX_OP(Xorpd, xorpd)
|
||||
AVX_OP(Xorps, xorps)
|
||||
|
@ -1257,21 +1257,6 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
|
||||
case kSSEFloat32Cmp:
|
||||
__ ucomiss(i.InputDoubleRegister(0), i.InputOperand(1));
|
||||
break;
|
||||
case kSSEFloat32Add:
|
||||
__ addss(i.InputDoubleRegister(0), i.InputOperand(1));
|
||||
break;
|
||||
case kSSEFloat32Sub:
|
||||
__ subss(i.InputDoubleRegister(0), i.InputOperand(1));
|
||||
break;
|
||||
case kSSEFloat32Mul:
|
||||
__ mulss(i.InputDoubleRegister(0), i.InputOperand(1));
|
||||
break;
|
||||
case kSSEFloat32Div:
|
||||
__ divss(i.InputDoubleRegister(0), i.InputOperand(1));
|
||||
// Don't delete this mov. It may improve performance on some CPUs,
|
||||
// when there is a (v)mulss depending on the result.
|
||||
__ movaps(i.OutputDoubleRegister(), i.OutputDoubleRegister());
|
||||
break;
|
||||
case kSSEFloat32Sqrt:
|
||||
__ sqrtss(i.OutputDoubleRegister(), i.InputOperand(0));
|
||||
break;
|
||||
@ -1301,21 +1286,6 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
|
||||
case kSSEFloat64Cmp:
|
||||
__ ucomisd(i.InputDoubleRegister(0), i.InputOperand(1));
|
||||
break;
|
||||
case kSSEFloat64Add:
|
||||
__ addsd(i.InputDoubleRegister(0), i.InputOperand(1));
|
||||
break;
|
||||
case kSSEFloat64Sub:
|
||||
__ subsd(i.InputDoubleRegister(0), i.InputOperand(1));
|
||||
break;
|
||||
case kSSEFloat64Mul:
|
||||
__ mulsd(i.InputDoubleRegister(0), i.InputOperand(1));
|
||||
break;
|
||||
case kSSEFloat64Div:
|
||||
__ divsd(i.InputDoubleRegister(0), i.InputOperand(1));
|
||||
// Don't delete this mov. It may improve performance on some CPUs,
|
||||
// when there is a (v)mulsd depending on the result.
|
||||
__ movaps(i.OutputDoubleRegister(), i.OutputDoubleRegister());
|
||||
break;
|
||||
case kSSEFloat32Max: {
|
||||
Label compare_swap, done_compare;
|
||||
if (instr->InputAt(1)->IsFPRegister()) {
|
||||
@ -1538,55 +1508,47 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
|
||||
case kSSEFloat64LoadLowWord32:
|
||||
__ movd(i.OutputDoubleRegister(), i.InputOperand(0));
|
||||
break;
|
||||
case kAVXFloat32Add: {
|
||||
CpuFeatureScope avx_scope(tasm(), AVX);
|
||||
__ vaddss(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
|
||||
i.InputOperand(1));
|
||||
case kFloat32Add: {
|
||||
__ Addss(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
|
||||
i.InputOperand(1));
|
||||
break;
|
||||
}
|
||||
case kAVXFloat32Sub: {
|
||||
CpuFeatureScope avx_scope(tasm(), AVX);
|
||||
__ vsubss(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
|
||||
i.InputOperand(1));
|
||||
case kFloat32Sub: {
|
||||
__ Subss(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
|
||||
i.InputOperand(1));
|
||||
break;
|
||||
}
|
||||
case kAVXFloat32Mul: {
|
||||
CpuFeatureScope avx_scope(tasm(), AVX);
|
||||
__ vmulss(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
|
||||
i.InputOperand(1));
|
||||
case kFloat32Mul: {
|
||||
__ Mulss(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
|
||||
i.InputOperand(1));
|
||||
break;
|
||||
}
|
||||
case kAVXFloat32Div: {
|
||||
CpuFeatureScope avx_scope(tasm(), AVX);
|
||||
__ vdivss(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
|
||||
i.InputOperand(1));
|
||||
case kFloat32Div: {
|
||||
__ Divss(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
|
||||
i.InputOperand(1));
|
||||
// Don't delete this mov. It may improve performance on some CPUs,
|
||||
// when there is a (v)mulss depending on the result.
|
||||
__ movaps(i.OutputDoubleRegister(), i.OutputDoubleRegister());
|
||||
break;
|
||||
}
|
||||
case kAVXFloat64Add: {
|
||||
CpuFeatureScope avx_scope(tasm(), AVX);
|
||||
__ vaddsd(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
|
||||
i.InputOperand(1));
|
||||
case kFloat64Add: {
|
||||
__ Addsd(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
|
||||
i.InputOperand(1));
|
||||
break;
|
||||
}
|
||||
case kAVXFloat64Sub: {
|
||||
CpuFeatureScope avx_scope(tasm(), AVX);
|
||||
__ vsubsd(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
|
||||
i.InputOperand(1));
|
||||
case kFloat64Sub: {
|
||||
__ Subsd(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
|
||||
i.InputOperand(1));
|
||||
break;
|
||||
}
|
||||
case kAVXFloat64Mul: {
|
||||
CpuFeatureScope avx_scope(tasm(), AVX);
|
||||
__ vmulsd(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
|
||||
i.InputOperand(1));
|
||||
case kFloat64Mul: {
|
||||
__ Mulsd(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
|
||||
i.InputOperand(1));
|
||||
break;
|
||||
}
|
||||
case kAVXFloat64Div: {
|
||||
CpuFeatureScope avx_scope(tasm(), AVX);
|
||||
__ vdivsd(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
|
||||
i.InputOperand(1));
|
||||
case kFloat64Div: {
|
||||
__ Divsd(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
|
||||
i.InputOperand(1));
|
||||
// Don't delete this mov. It may improve performance on some CPUs,
|
||||
// when there is a (v)mulsd depending on the result.
|
||||
__ movaps(i.OutputDoubleRegister(), i.OutputDoubleRegister());
|
||||
|
@ -48,19 +48,11 @@ namespace compiler {
|
||||
V(IA32MFence) \
|
||||
V(IA32LFence) \
|
||||
V(SSEFloat32Cmp) \
|
||||
V(SSEFloat32Add) \
|
||||
V(SSEFloat32Sub) \
|
||||
V(SSEFloat32Mul) \
|
||||
V(SSEFloat32Div) \
|
||||
V(SSEFloat32Abs) \
|
||||
V(SSEFloat32Neg) \
|
||||
V(SSEFloat32Sqrt) \
|
||||
V(SSEFloat32Round) \
|
||||
V(SSEFloat64Cmp) \
|
||||
V(SSEFloat64Add) \
|
||||
V(SSEFloat64Sub) \
|
||||
V(SSEFloat64Mul) \
|
||||
V(SSEFloat64Div) \
|
||||
V(SSEFloat64Mod) \
|
||||
V(SSEFloat32Max) \
|
||||
V(SSEFloat64Max) \
|
||||
@ -86,14 +78,14 @@ namespace compiler {
|
||||
V(SSEFloat64InsertHighWord32) \
|
||||
V(SSEFloat64LoadLowWord32) \
|
||||
V(SSEFloat64SilenceNaN) \
|
||||
V(AVXFloat32Add) \
|
||||
V(AVXFloat32Sub) \
|
||||
V(AVXFloat32Mul) \
|
||||
V(AVXFloat32Div) \
|
||||
V(AVXFloat64Add) \
|
||||
V(AVXFloat64Sub) \
|
||||
V(AVXFloat64Mul) \
|
||||
V(AVXFloat64Div) \
|
||||
V(Float32Add) \
|
||||
V(Float32Sub) \
|
||||
V(Float64Add) \
|
||||
V(Float64Sub) \
|
||||
V(Float32Mul) \
|
||||
V(Float32Div) \
|
||||
V(Float64Mul) \
|
||||
V(Float64Div) \
|
||||
V(AVXFloat64Abs) \
|
||||
V(AVXFloat64Neg) \
|
||||
V(AVXFloat32Abs) \
|
||||
|
@ -49,19 +49,11 @@ int InstructionScheduler::GetTargetInstructionFlags(
|
||||
case kIA32Bswap:
|
||||
case kIA32Lea:
|
||||
case kSSEFloat32Cmp:
|
||||
case kSSEFloat32Add:
|
||||
case kSSEFloat32Sub:
|
||||
case kSSEFloat32Mul:
|
||||
case kSSEFloat32Div:
|
||||
case kSSEFloat32Abs:
|
||||
case kSSEFloat32Neg:
|
||||
case kSSEFloat32Sqrt:
|
||||
case kSSEFloat32Round:
|
||||
case kSSEFloat64Cmp:
|
||||
case kSSEFloat64Add:
|
||||
case kSSEFloat64Sub:
|
||||
case kSSEFloat64Mul:
|
||||
case kSSEFloat64Div:
|
||||
case kSSEFloat64Mod:
|
||||
case kSSEFloat32Max:
|
||||
case kSSEFloat64Max:
|
||||
@ -87,14 +79,14 @@ int InstructionScheduler::GetTargetInstructionFlags(
|
||||
case kSSEFloat64InsertHighWord32:
|
||||
case kSSEFloat64LoadLowWord32:
|
||||
case kSSEFloat64SilenceNaN:
|
||||
case kAVXFloat32Add:
|
||||
case kAVXFloat32Sub:
|
||||
case kAVXFloat32Mul:
|
||||
case kAVXFloat32Div:
|
||||
case kAVXFloat64Add:
|
||||
case kAVXFloat64Sub:
|
||||
case kAVXFloat64Mul:
|
||||
case kAVXFloat64Div:
|
||||
case kFloat32Add:
|
||||
case kFloat32Sub:
|
||||
case kFloat64Add:
|
||||
case kFloat64Sub:
|
||||
case kFloat32Mul:
|
||||
case kFloat32Div:
|
||||
case kFloat64Mul:
|
||||
case kFloat64Div:
|
||||
case kAVXFloat64Abs:
|
||||
case kAVXFloat64Neg:
|
||||
case kAVXFloat32Abs:
|
||||
@ -448,7 +440,7 @@ int InstructionScheduler::GetInstructionLatency(const Instruction* instr) {
|
||||
// Basic latency modeling for ia32 instructions. They have been determined
|
||||
// in an empirical way.
|
||||
switch (instr->arch_opcode()) {
|
||||
case kSSEFloat64Mul:
|
||||
case kFloat64Mul:
|
||||
return 5;
|
||||
case kIA32Imul:
|
||||
case kIA32ImulHigh:
|
||||
@ -456,18 +448,18 @@ int InstructionScheduler::GetInstructionLatency(const Instruction* instr) {
|
||||
case kSSEFloat32Cmp:
|
||||
case kSSEFloat64Cmp:
|
||||
return 9;
|
||||
case kSSEFloat32Add:
|
||||
case kSSEFloat32Sub:
|
||||
case kFloat32Add:
|
||||
case kFloat32Sub:
|
||||
case kFloat64Add:
|
||||
case kFloat64Sub:
|
||||
case kSSEFloat32Abs:
|
||||
case kSSEFloat32Neg:
|
||||
case kSSEFloat64Add:
|
||||
case kSSEFloat64Sub:
|
||||
case kSSEFloat64Max:
|
||||
case kSSEFloat64Min:
|
||||
case kSSEFloat64Abs:
|
||||
case kSSEFloat64Neg:
|
||||
return 5;
|
||||
case kSSEFloat32Mul:
|
||||
case kFloat32Mul:
|
||||
return 4;
|
||||
case kSSEFloat32ToFloat64:
|
||||
case kSSEFloat64ToFloat32:
|
||||
@ -485,9 +477,9 @@ int InstructionScheduler::GetInstructionLatency(const Instruction* instr) {
|
||||
return 33;
|
||||
case kIA32Udiv:
|
||||
return 26;
|
||||
case kSSEFloat32Div:
|
||||
case kFloat32Div:
|
||||
return 35;
|
||||
case kSSEFloat64Div:
|
||||
case kFloat64Div:
|
||||
return 63;
|
||||
case kSSEFloat32Sqrt:
|
||||
case kSSEFloat64Sqrt:
|
||||
|
@ -315,14 +315,14 @@ void VisitRR(InstructionSelector* selector, Node* node,
|
||||
}
|
||||
|
||||
void VisitRROFloat(InstructionSelector* selector, Node* node,
|
||||
ArchOpcode avx_opcode, ArchOpcode sse_opcode) {
|
||||
ArchOpcode opcode) {
|
||||
IA32OperandGenerator g(selector);
|
||||
InstructionOperand operand0 = g.UseRegister(node->InputAt(0));
|
||||
InstructionOperand operand1 = g.Use(node->InputAt(1));
|
||||
if (selector->IsSupported(AVX)) {
|
||||
selector->Emit(avx_opcode, g.DefineAsRegister(node), operand0, operand1);
|
||||
selector->Emit(opcode, g.DefineAsRegister(node), operand0, operand1);
|
||||
} else {
|
||||
selector->Emit(sse_opcode, g.DefineSameAsFirst(node), operand0, operand1);
|
||||
selector->Emit(opcode, g.DefineSameAsFirst(node), operand0, operand1);
|
||||
}
|
||||
}
|
||||
|
||||
@ -1175,23 +1175,23 @@ void InstructionSelector::VisitWord32Ror(Node* node) {
|
||||
V(F64x2Trunc, kIA32F64x2Round | MiscField::encode(kRoundToZero)) \
|
||||
V(F64x2NearestInt, kIA32F64x2Round | MiscField::encode(kRoundToNearest))
|
||||
|
||||
#define RRO_FLOAT_OP_LIST(V) \
|
||||
V(Float32Add, kAVXFloat32Add, kSSEFloat32Add) \
|
||||
V(Float64Add, kAVXFloat64Add, kSSEFloat64Add) \
|
||||
V(Float32Sub, kAVXFloat32Sub, kSSEFloat32Sub) \
|
||||
V(Float64Sub, kAVXFloat64Sub, kSSEFloat64Sub) \
|
||||
V(Float32Mul, kAVXFloat32Mul, kSSEFloat32Mul) \
|
||||
V(Float64Mul, kAVXFloat64Mul, kSSEFloat64Mul) \
|
||||
V(Float32Div, kAVXFloat32Div, kSSEFloat32Div) \
|
||||
V(Float64Div, kAVXFloat64Div, kSSEFloat64Div) \
|
||||
V(F64x2Add, kIA32F64x2Add, kIA32F64x2Add) \
|
||||
V(F64x2Sub, kIA32F64x2Sub, kIA32F64x2Sub) \
|
||||
V(F64x2Mul, kIA32F64x2Mul, kIA32F64x2Mul) \
|
||||
V(F64x2Div, kIA32F64x2Div, kIA32F64x2Div) \
|
||||
V(F64x2Eq, kIA32F64x2Eq, kIA32F64x2Eq) \
|
||||
V(F64x2Ne, kIA32F64x2Ne, kIA32F64x2Ne) \
|
||||
V(F64x2Lt, kIA32F64x2Lt, kIA32F64x2Lt) \
|
||||
V(F64x2Le, kIA32F64x2Le, kIA32F64x2Le)
|
||||
#define RRO_FLOAT_OP_LIST(V) \
|
||||
V(Float32Add, kFloat32Add) \
|
||||
V(Float64Add, kFloat64Add) \
|
||||
V(Float32Sub, kFloat32Sub) \
|
||||
V(Float64Sub, kFloat64Sub) \
|
||||
V(Float32Mul, kFloat32Mul) \
|
||||
V(Float64Mul, kFloat64Mul) \
|
||||
V(Float32Div, kFloat32Div) \
|
||||
V(Float64Div, kFloat64Div) \
|
||||
V(F64x2Add, kIA32F64x2Add) \
|
||||
V(F64x2Sub, kIA32F64x2Sub) \
|
||||
V(F64x2Mul, kIA32F64x2Mul) \
|
||||
V(F64x2Div, kIA32F64x2Div) \
|
||||
V(F64x2Eq, kIA32F64x2Eq) \
|
||||
V(F64x2Ne, kIA32F64x2Ne) \
|
||||
V(F64x2Lt, kIA32F64x2Lt) \
|
||||
V(F64x2Le, kIA32F64x2Le)
|
||||
|
||||
#define FLOAT_UNOP_LIST(V) \
|
||||
V(Float32Abs, kAVXFloat32Abs, kSSEFloat32Abs) \
|
||||
@ -1233,9 +1233,9 @@ RR_OP_LIST(RR_VISITOR)
|
||||
#undef RR_VISITOR
|
||||
#undef RR_OP_LIST
|
||||
|
||||
#define RRO_FLOAT_VISITOR(Name, avx, sse) \
|
||||
#define RRO_FLOAT_VISITOR(Name, opcode) \
|
||||
void InstructionSelector::Visit##Name(Node* node) { \
|
||||
VisitRROFloat(this, node, avx, sse); \
|
||||
VisitRROFloat(this, node, opcode); \
|
||||
}
|
||||
RRO_FLOAT_OP_LIST(RRO_FLOAT_VISITOR)
|
||||
#undef RRO_FLOAT_VISITOR
|
||||
|
@ -321,8 +321,8 @@ INSTANTIATE_TEST_SUITE_P(InstructionSelectorTest,
|
||||
|
||||
class AddressingModeUnitTest : public InstructionSelectorTest {
|
||||
public:
|
||||
AddressingModeUnitTest() : m(NULL) { Reset(); }
|
||||
~AddressingModeUnitTest() { delete m; }
|
||||
AddressingModeUnitTest() : m(nullptr) { Reset(); }
|
||||
~AddressingModeUnitTest() override { delete m; }
|
||||
|
||||
void Run(Node* base, Node* load_index, Node* store_index,
|
||||
AddressingMode mode) {
|
||||
@ -812,10 +812,10 @@ TEST_F(InstructionSelectorTest, Float64BinopArithmetic) {
|
||||
m.Return(ret);
|
||||
Stream s = m.Build(AVX);
|
||||
ASSERT_EQ(4U, s.size());
|
||||
EXPECT_EQ(kAVXFloat64Add, s[0]->arch_opcode());
|
||||
EXPECT_EQ(kAVXFloat64Mul, s[1]->arch_opcode());
|
||||
EXPECT_EQ(kAVXFloat64Sub, s[2]->arch_opcode());
|
||||
EXPECT_EQ(kAVXFloat64Div, s[3]->arch_opcode());
|
||||
EXPECT_EQ(kFloat64Add, s[0]->arch_opcode());
|
||||
EXPECT_EQ(kFloat64Mul, s[1]->arch_opcode());
|
||||
EXPECT_EQ(kFloat64Sub, s[2]->arch_opcode());
|
||||
EXPECT_EQ(kFloat64Div, s[3]->arch_opcode());
|
||||
}
|
||||
{
|
||||
StreamBuilder m(this, MachineType::Float64(), MachineType::Float64(),
|
||||
@ -827,10 +827,10 @@ TEST_F(InstructionSelectorTest, Float64BinopArithmetic) {
|
||||
m.Return(ret);
|
||||
Stream s = m.Build();
|
||||
ASSERT_EQ(4U, s.size());
|
||||
EXPECT_EQ(kSSEFloat64Add, s[0]->arch_opcode());
|
||||
EXPECT_EQ(kSSEFloat64Mul, s[1]->arch_opcode());
|
||||
EXPECT_EQ(kSSEFloat64Sub, s[2]->arch_opcode());
|
||||
EXPECT_EQ(kSSEFloat64Div, s[3]->arch_opcode());
|
||||
EXPECT_EQ(kFloat64Add, s[0]->arch_opcode());
|
||||
EXPECT_EQ(kFloat64Mul, s[1]->arch_opcode());
|
||||
EXPECT_EQ(kFloat64Sub, s[2]->arch_opcode());
|
||||
EXPECT_EQ(kFloat64Div, s[3]->arch_opcode());
|
||||
}
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user