[wasm-simd][ia32] f64x2 min max move usage of TempRegister

We have a scratch available, so we can use it instead of asking for a
temporary register from the register allocator. We can also relax the
unique register requirement, since we are careful not to overwrite src0
or src1 in the codegen.

Bug: chromium:1204071
Change-Id: Ia7775167e323b3bca80f63304687cdbd425af0e5
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2873227
Reviewed-by: Bill Budge <bbudge@chromium.org>
Commit-Queue: Zhi An Ng <zhin@chromium.org>
Cr-Commit-Position: refs/heads/master@{#74392}
This commit is contained in:
Ng Zhi An 2021-05-04 15:54:42 -07:00 committed by V8 LUCI CQ
parent d4ac2dc8bb
commit d39a06a952
2 changed files with 27 additions and 35 deletions

View File

@ -1938,44 +1938,42 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
break;
}
case kIA32F64x2Min: {
Operand src1 = i.InputOperand(1);
XMMRegister dst = i.OutputSimd128Register(),
src = i.InputSimd128Register(0),
tmp = i.TempSimd128Register(0);
src0 = i.InputSimd128Register(0),
src1 = i.InputSimd128Register(1);
// The minpd instruction doesn't propagate NaNs and +0's in its first
// operand. Perform minpd in both orders, merge the resuls, and adjust.
__ Movupd(tmp, src1);
__ Minpd(tmp, tmp, src);
__ Minpd(dst, src, src1);
__ Movapd(kScratchDoubleReg, src1);
__ Minpd(kScratchDoubleReg, kScratchDoubleReg, src0);
__ Minpd(dst, src0, src1);
// propagate -0's and NaNs, which may be non-canonical.
__ Orpd(tmp, dst);
__ Orpd(kScratchDoubleReg, dst);
// Canonicalize NaNs by quieting and clearing the payload.
__ Cmpunordpd(dst, dst, tmp);
__ Orpd(tmp, dst);
__ Cmpunordpd(dst, dst, kScratchDoubleReg);
__ Orpd(kScratchDoubleReg, dst);
__ Psrlq(dst, byte{13});
__ Andnpd(dst, tmp);
__ Andnpd(dst, kScratchDoubleReg);
break;
}
case kIA32F64x2Max: {
Operand src1 = i.InputOperand(1);
XMMRegister dst = i.OutputSimd128Register(),
src = i.InputSimd128Register(0),
tmp = i.TempSimd128Register(0);
src0 = i.InputSimd128Register(0),
src1 = i.InputSimd128Register(1);
// The maxpd instruction doesn't propagate NaNs and +0's in its first
// operand. Perform maxpd in both orders, merge the resuls, and adjust.
__ Movupd(tmp, src1);
__ Maxpd(tmp, tmp, src);
__ Maxpd(dst, src, src1);
__ Movapd(kScratchDoubleReg, src1);
__ Maxpd(kScratchDoubleReg, kScratchDoubleReg, src0);
__ Maxpd(dst, src0, src1);
// Find discrepancies.
__ Xorpd(dst, tmp);
__ Xorpd(dst, kScratchDoubleReg);
// Propagate NaNs, which may be non-canonical.
__ Orpd(tmp, dst);
__ Orpd(kScratchDoubleReg, dst);
// Propagate sign discrepancy and (subtle) quiet NaNs.
__ Subpd(tmp, tmp, dst);
__ Subpd(kScratchDoubleReg, kScratchDoubleReg, dst);
// Canonicalize NaNs by clearing the payload. Sign is non-deterministic.
__ Cmpunordpd(dst, dst, tmp);
__ Cmpunordpd(dst, dst, kScratchDoubleReg);
__ Psrlq(dst, byte{13});
__ Andnpd(dst, tmp);
__ Andnpd(dst, kScratchDoubleReg);
break;
}
case kIA32F64x2Eq: {

View File

@ -2354,30 +2354,24 @@ void InstructionSelector::VisitS128Const(Node* node) {
void InstructionSelector::VisitF64x2Min(Node* node) {
IA32OperandGenerator g(this);
InstructionOperand temps[] = {g.TempSimd128Register()};
InstructionOperand operand0 = g.UseUniqueRegister(node->InputAt(0));
InstructionOperand operand1 = g.UseUniqueRegister(node->InputAt(1));
InstructionOperand operand0 = g.UseRegister(node->InputAt(0));
InstructionOperand operand1 = g.UseRegister(node->InputAt(1));
if (IsSupported(AVX)) {
Emit(kIA32F64x2Min, g.DefineAsRegister(node), operand0, operand1,
arraysize(temps), temps);
Emit(kIA32F64x2Min, g.DefineAsRegister(node), operand0, operand1);
} else {
Emit(kIA32F64x2Min, g.DefineSameAsFirst(node), operand0, operand1,
arraysize(temps), temps);
Emit(kIA32F64x2Min, g.DefineSameAsFirst(node), operand0, operand1);
}
}
void InstructionSelector::VisitF64x2Max(Node* node) {
IA32OperandGenerator g(this);
InstructionOperand temps[] = {g.TempSimd128Register()};
InstructionOperand operand0 = g.UseUniqueRegister(node->InputAt(0));
InstructionOperand operand1 = g.UseUniqueRegister(node->InputAt(1));
InstructionOperand operand0 = g.UseRegister(node->InputAt(0));
InstructionOperand operand1 = g.UseRegister(node->InputAt(1));
if (IsSupported(AVX)) {
Emit(kIA32F64x2Max, g.DefineAsRegister(node), operand0, operand1,
arraysize(temps), temps);
Emit(kIA32F64x2Max, g.DefineAsRegister(node), operand0, operand1);
} else {
Emit(kIA32F64x2Max, g.DefineSameAsFirst(node), operand0, operand1,
arraysize(temps), temps);
Emit(kIA32F64x2Max, g.DefineSameAsFirst(node), operand0, operand1);
}
}