[ia32] Use AVX for f32/f64 min max mod

Change them to use macro-assembler functions so they will emit AVX if
supported.

Rename the opcodes since they are no longer SSE specific.

Bug: v8:12148
Change-Id: Iaa2aa54dde9f9b41304394f98b8ed18dbb65715b
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3158679
Reviewed-by: Deepti Gandluri <gdeepti@chromium.org>
Commit-Queue: Zhi An Ng <zhin@chromium.org>
Cr-Commit-Position: refs/heads/main@{#76825}
This commit is contained in:
Ng Zhi An 2021-09-13 15:38:38 -07:00 committed by V8 LUCI CQ
parent 1733097759
commit 8b8e61c089
4 changed files with 56 additions and 56 deletions

View File

@ -1271,63 +1271,63 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
case kIA32Float64Cmp:
__ Ucomisd(i.InputDoubleRegister(0), i.InputOperand(1));
break;
case kSSEFloat32Max: {
case kIA32Float32Max: {
Label compare_swap, done_compare;
if (instr->InputAt(1)->IsFPRegister()) {
__ ucomiss(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
__ Ucomiss(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
} else {
__ ucomiss(i.InputDoubleRegister(0), i.InputOperand(1));
__ Ucomiss(i.InputDoubleRegister(0), i.InputOperand(1));
}
auto ool =
zone()->New<OutOfLineLoadFloat32NaN>(this, i.OutputDoubleRegister());
__ j(parity_even, ool->entry());
__ j(above, &done_compare, Label::kNear);
__ j(below, &compare_swap, Label::kNear);
__ movmskps(i.TempRegister(0), i.InputDoubleRegister(0));
__ Movmskps(i.TempRegister(0), i.InputDoubleRegister(0));
__ test(i.TempRegister(0), Immediate(1));
__ j(zero, &done_compare, Label::kNear);
__ bind(&compare_swap);
if (instr->InputAt(1)->IsFPRegister()) {
__ movss(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
__ Movss(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
} else {
__ movss(i.InputDoubleRegister(0), i.InputOperand(1));
__ Movss(i.InputDoubleRegister(0), i.InputOperand(1));
}
__ bind(&done_compare);
__ bind(ool->exit());
break;
}
case kSSEFloat64Max: {
case kIA32Float64Max: {
Label compare_swap, done_compare;
if (instr->InputAt(1)->IsFPRegister()) {
__ ucomisd(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
__ Ucomisd(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
} else {
__ ucomisd(i.InputDoubleRegister(0), i.InputOperand(1));
__ Ucomisd(i.InputDoubleRegister(0), i.InputOperand(1));
}
auto ool =
zone()->New<OutOfLineLoadFloat64NaN>(this, i.OutputDoubleRegister());
__ j(parity_even, ool->entry());
__ j(above, &done_compare, Label::kNear);
__ j(below, &compare_swap, Label::kNear);
__ movmskpd(i.TempRegister(0), i.InputDoubleRegister(0));
__ Movmskpd(i.TempRegister(0), i.InputDoubleRegister(0));
__ test(i.TempRegister(0), Immediate(1));
__ j(zero, &done_compare, Label::kNear);
__ bind(&compare_swap);
if (instr->InputAt(1)->IsFPRegister()) {
__ movsd(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
__ Movsd(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
} else {
__ movsd(i.InputDoubleRegister(0), i.InputOperand(1));
__ Movsd(i.InputDoubleRegister(0), i.InputOperand(1));
}
__ bind(&done_compare);
__ bind(ool->exit());
break;
}
case kSSEFloat32Min: {
case kIA32Float32Min: {
Label compare_swap, done_compare;
if (instr->InputAt(1)->IsFPRegister()) {
__ ucomiss(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
__ Ucomiss(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
} else {
__ ucomiss(i.InputDoubleRegister(0), i.InputOperand(1));
__ Ucomiss(i.InputDoubleRegister(0), i.InputOperand(1));
}
auto ool =
zone()->New<OutOfLineLoadFloat32NaN>(this, i.OutputDoubleRegister());
@ -1335,29 +1335,29 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ j(below, &done_compare, Label::kNear);
__ j(above, &compare_swap, Label::kNear);
if (instr->InputAt(1)->IsFPRegister()) {
__ movmskps(i.TempRegister(0), i.InputDoubleRegister(1));
__ Movmskps(i.TempRegister(0), i.InputDoubleRegister(1));
} else {
__ movss(kScratchDoubleReg, i.InputOperand(1));
__ movmskps(i.TempRegister(0), kScratchDoubleReg);
__ Movss(kScratchDoubleReg, i.InputOperand(1));
__ Movmskps(i.TempRegister(0), kScratchDoubleReg);
}
__ test(i.TempRegister(0), Immediate(1));
__ j(zero, &done_compare, Label::kNear);
__ bind(&compare_swap);
if (instr->InputAt(1)->IsFPRegister()) {
__ movss(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
__ Movss(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
} else {
__ movss(i.InputDoubleRegister(0), i.InputOperand(1));
__ Movss(i.InputDoubleRegister(0), i.InputOperand(1));
}
__ bind(&done_compare);
__ bind(ool->exit());
break;
}
case kSSEFloat64Min: {
case kIA32Float64Min: {
Label compare_swap, done_compare;
if (instr->InputAt(1)->IsFPRegister()) {
__ ucomisd(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
__ Ucomisd(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
} else {
__ ucomisd(i.InputDoubleRegister(0), i.InputOperand(1));
__ Ucomisd(i.InputDoubleRegister(0), i.InputOperand(1));
}
auto ool =
zone()->New<OutOfLineLoadFloat64NaN>(this, i.OutputDoubleRegister());
@ -1365,32 +1365,32 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ j(below, &done_compare, Label::kNear);
__ j(above, &compare_swap, Label::kNear);
if (instr->InputAt(1)->IsFPRegister()) {
__ movmskpd(i.TempRegister(0), i.InputDoubleRegister(1));
__ Movmskpd(i.TempRegister(0), i.InputDoubleRegister(1));
} else {
__ movsd(kScratchDoubleReg, i.InputOperand(1));
__ movmskpd(i.TempRegister(0), kScratchDoubleReg);
__ Movsd(kScratchDoubleReg, i.InputOperand(1));
__ Movmskpd(i.TempRegister(0), kScratchDoubleReg);
}
__ test(i.TempRegister(0), Immediate(1));
__ j(zero, &done_compare, Label::kNear);
__ bind(&compare_swap);
if (instr->InputAt(1)->IsFPRegister()) {
__ movsd(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
__ Movsd(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
} else {
__ movsd(i.InputDoubleRegister(0), i.InputOperand(1));
__ Movsd(i.InputDoubleRegister(0), i.InputOperand(1));
}
__ bind(&done_compare);
__ bind(ool->exit());
break;
}
case kSSEFloat64Mod: {
case kIA32Float64Mod: {
Register tmp = i.TempRegister(1);
__ mov(tmp, esp);
__ AllocateStackSpace(kDoubleSize);
__ and_(esp, -8); // align to 8 byte boundary.
// Move values to st(0) and st(1).
__ movsd(Operand(esp, 0), i.InputDoubleRegister(1));
__ Movsd(Operand(esp, 0), i.InputDoubleRegister(1));
__ fld_d(Operand(esp, 0));
__ movsd(Operand(esp, 0), i.InputDoubleRegister(0));
__ Movsd(Operand(esp, 0), i.InputDoubleRegister(0));
__ fld_d(Operand(esp, 0));
// Loop while fprem isn't done.
Label mod_loop;
@ -1406,7 +1406,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
// Move output to stack and clean up.
__ fstp(1);
__ fstp_d(Operand(esp, 0));
__ movsd(i.OutputDoubleRegister(), Operand(esp, 0));
__ Movsd(i.OutputDoubleRegister(), Operand(esp, 0));
__ mov(esp, tmp);
break;
}
@ -1544,9 +1544,9 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
i.TempRegister(0));
break;
}
case kSSEFloat64SilenceNaN:
__ xorps(kScratchDoubleReg, kScratchDoubleReg);
__ subsd(i.InputDoubleRegister(0), kScratchDoubleReg);
case kIA32Float64SilenceNaN:
__ Xorps(kScratchDoubleReg, kScratchDoubleReg);
__ Subsd(i.InputDoubleRegister(0), kScratchDoubleReg);
break;
case kIA32Movsxbl:
ASSEMBLE_MOVX(movsx_b);

View File

@ -51,11 +51,11 @@ namespace compiler {
V(IA32Float32Sqrt) \
V(IA32Float32Round) \
V(IA32Float64Cmp) \
V(SSEFloat64Mod) \
V(SSEFloat32Max) \
V(SSEFloat64Max) \
V(SSEFloat32Min) \
V(SSEFloat64Min) \
V(IA32Float64Mod) \
V(IA32Float32Max) \
V(IA32Float64Max) \
V(IA32Float32Min) \
V(IA32Float64Min) \
V(IA32Float64Sqrt) \
V(IA32Float64Round) \
V(IA32Float32ToFloat64) \
@ -73,7 +73,7 @@ namespace compiler {
V(IA32Float64InsertLowWord32) \
V(IA32Float64InsertHighWord32) \
V(IA32Float64LoadLowWord32) \
V(SSEFloat64SilenceNaN) \
V(IA32Float64SilenceNaN) \
V(Float32Add) \
V(Float32Sub) \
V(Float64Add) \

View File

@ -52,11 +52,11 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kIA32Float32Sqrt:
case kIA32Float32Round:
case kIA32Float64Cmp:
case kSSEFloat64Mod:
case kSSEFloat32Max:
case kSSEFloat64Max:
case kSSEFloat32Min:
case kSSEFloat64Min:
case kIA32Float64Mod:
case kIA32Float32Max:
case kIA32Float64Max:
case kIA32Float32Min:
case kIA32Float64Min:
case kIA32Float64Sqrt:
case kIA32Float64Round:
case kIA32Float32ToFloat64:
@ -74,7 +74,7 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kIA32Float64InsertLowWord32:
case kIA32Float64InsertHighWord32:
case kIA32Float64LoadLowWord32:
case kSSEFloat64SilenceNaN:
case kIA32Float64SilenceNaN:
case kFloat32Add:
case kFloat32Sub:
case kFloat64Add:
@ -411,8 +411,8 @@ int InstructionScheduler::GetInstructionLatency(const Instruction* instr) {
case kFloat64Sub:
case kFloat32Abs:
case kFloat32Neg:
case kSSEFloat64Max:
case kSSEFloat64Min:
case kIA32Float64Max:
case kIA32Float64Min:
case kFloat64Abs:
case kFloat64Neg:
return 5;
@ -441,7 +441,7 @@ int InstructionScheduler::GetInstructionLatency(const Instruction* instr) {
case kIA32Float32Sqrt:
case kIA32Float64Sqrt:
return 25;
case kSSEFloat64Mod:
case kIA32Float64Mod:
return 50;
case kArchTruncateDoubleToI:
return 9;

View File

@ -1359,7 +1359,7 @@ void InstructionSelector::VisitRoundUint32ToFloat32(Node* node) {
void InstructionSelector::VisitFloat64Mod(Node* node) {
IA32OperandGenerator g(this);
InstructionOperand temps[] = {g.TempRegister(eax), g.TempRegister()};
Emit(kSSEFloat64Mod, g.DefineSameAsFirst(node),
Emit(kIA32Float64Mod, g.DefineSameAsFirst(node),
g.UseRegister(node->InputAt(0)), g.UseRegister(node->InputAt(1)),
arraysize(temps), temps);
}
@ -1367,7 +1367,7 @@ void InstructionSelector::VisitFloat64Mod(Node* node) {
void InstructionSelector::VisitFloat32Max(Node* node) {
IA32OperandGenerator g(this);
InstructionOperand temps[] = {g.TempRegister()};
Emit(kSSEFloat32Max, g.DefineSameAsFirst(node),
Emit(kIA32Float32Max, g.DefineSameAsFirst(node),
g.UseRegister(node->InputAt(0)), g.Use(node->InputAt(1)),
arraysize(temps), temps);
}
@ -1375,7 +1375,7 @@ void InstructionSelector::VisitFloat32Max(Node* node) {
void InstructionSelector::VisitFloat64Max(Node* node) {
IA32OperandGenerator g(this);
InstructionOperand temps[] = {g.TempRegister()};
Emit(kSSEFloat64Max, g.DefineSameAsFirst(node),
Emit(kIA32Float64Max, g.DefineSameAsFirst(node),
g.UseRegister(node->InputAt(0)), g.Use(node->InputAt(1)),
arraysize(temps), temps);
}
@ -1383,7 +1383,7 @@ void InstructionSelector::VisitFloat64Max(Node* node) {
void InstructionSelector::VisitFloat32Min(Node* node) {
IA32OperandGenerator g(this);
InstructionOperand temps[] = {g.TempRegister()};
Emit(kSSEFloat32Min, g.DefineSameAsFirst(node),
Emit(kIA32Float32Min, g.DefineSameAsFirst(node),
g.UseRegister(node->InputAt(0)), g.Use(node->InputAt(1)),
arraysize(temps), temps);
}
@ -1391,7 +1391,7 @@ void InstructionSelector::VisitFloat32Min(Node* node) {
void InstructionSelector::VisitFloat64Min(Node* node) {
IA32OperandGenerator g(this);
InstructionOperand temps[] = {g.TempRegister()};
Emit(kSSEFloat64Min, g.DefineSameAsFirst(node),
Emit(kIA32Float64Min, g.DefineSameAsFirst(node),
g.UseRegister(node->InputAt(0)), g.Use(node->InputAt(1)),
arraysize(temps), temps);
}
@ -1987,7 +1987,7 @@ void InstructionSelector::VisitFloat64InsertHighWord32(Node* node) {
void InstructionSelector::VisitFloat64SilenceNaN(Node* node) {
IA32OperandGenerator g(this);
Emit(kSSEFloat64SilenceNaN, g.DefineSameAsFirst(node),
Emit(kIA32Float64SilenceNaN, g.DefineSameAsFirst(node),
g.UseRegister(node->InputAt(0)));
}