[wasm-simd] Implement F64x2 min max for x64

Also add a IsExtreme(double) overload.
This wasn't causing issues because there was no codepath
which exercised it (only approx operations did).

Change-Id: If7583fb567137c428d16c0d2cdfc37e086f7f3fd
Bug: v8:8460
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/1726675
Reviewed-by: Michael Starzinger <mstarzinger@chromium.org>
Reviewed-by: Bill Budge <bbudge@chromium.org>
Commit-Queue: Zhi An Ng <zhin@chromium.org>
Cr-Commit-Position: refs/heads/master@{#63053}
This commit is contained in:
Ng Zhi An 2019-07-31 13:13:36 -07:00 committed by Commit Bot
parent 5cf67ad933
commit e17ac92556
16 changed files with 97 additions and 3 deletions

View File

@ -1335,10 +1335,10 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase {
AVX_S_3(vsub, 0x5c)
AVX_S_3(vmul, 0x59)
AVX_SP_3(vdiv, 0x5e)
AVX_SP_3(vmin, 0x5d)
AVX_SP_3(vmax, 0x5f)
AVX_S_3(vmin, 0x5d)
AVX_S_3(vmax, 0x5f)
AVX_P_3(vand, 0x54)
AVX_P_3(vandn, 0x55)
AVX_3(vandnps, 0x55, vps)
AVX_P_3(vor, 0x56)
AVX_P_3(vxor, 0x57)
AVX_3(vcvtsd2ss, 0x5a, vsd)

View File

@ -6,10 +6,13 @@
#define V8_CODEGEN_X64_SSE_INSTR_H_
#define SSE2_INSTRUCTION_LIST(V) \
V(andnpd, 66, 0F, 55) \
V(addpd, 66, 0F, 58) \
V(mulpd, 66, 0F, 59) \
V(cvtps2dq, 66, 0F, 5B) \
V(subpd, 66, 0F, 5C) \
V(minpd, 66, 0F, 5D) \
V(maxpd, 66, 0F, 5F) \
V(punpcklbw, 66, 0F, 60) \
V(punpcklwd, 66, 0F, 61) \
V(punpckldq, 66, 0F, 62) \

View File

@ -1833,6 +1833,10 @@ void InstructionSelector::VisitNode(Node* node) {
return MarkAsSimd128(node), VisitF64x2Sub(node);
case IrOpcode::kF64x2Mul:
return MarkAsSimd128(node), VisitF64x2Mul(node);
case IrOpcode::kF64x2Min:
return MarkAsSimd128(node), VisitF64x2Min(node);
case IrOpcode::kF64x2Max:
return MarkAsSimd128(node), VisitF64x2Max(node);
case IrOpcode::kF64x2Eq:
return MarkAsSimd128(node), VisitF64x2Eq(node);
case IrOpcode::kF64x2Ne:
@ -2575,6 +2579,8 @@ void InstructionSelector::VisitF64x2Neg(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF64x2Add(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF64x2Sub(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF64x2Mul(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF64x2Min(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF64x2Max(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF64x2Eq(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF64x2Ne(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF64x2Lt(Node* node) { UNIMPLEMENTED(); }

View File

@ -2296,6 +2296,45 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
ASSEMBLE_SSE_BINOP(mulpd);
break;
}
case kX64F64x2Min: {
XMMRegister src1 = i.InputSimd128Register(1),
dst = i.OutputSimd128Register();
DCHECK_EQ(dst, i.InputSimd128Register(0));
// The minpd instruction doesn't propagate NaNs and +0's in its first
// operand. Perform minpd in both orders, merge the resuls, and adjust.
__ movapd(kScratchDoubleReg, src1);
__ minpd(kScratchDoubleReg, dst);
__ minpd(dst, src1);
// propagate -0's and NaNs, which may be non-canonical.
__ orpd(kScratchDoubleReg, dst);
// Canonicalize NaNs by quieting and clearing the payload.
__ cmppd(dst, kScratchDoubleReg, 3);
__ orpd(kScratchDoubleReg, dst);
__ psrlq(dst, 13);
__ andnpd(dst, kScratchDoubleReg);
break;
}
case kX64F64x2Max: {
XMMRegister src1 = i.InputSimd128Register(1),
dst = i.OutputSimd128Register();
DCHECK_EQ(dst, i.InputSimd128Register(0));
// The maxpd instruction doesn't propagate NaNs and +0's in its first
// operand. Perform maxpd in both orders, merge the resuls, and adjust.
__ movapd(kScratchDoubleReg, src1);
__ maxpd(kScratchDoubleReg, dst);
__ maxpd(dst, src1);
// Find discrepancies.
__ xorpd(dst, kScratchDoubleReg);
// Propagate NaNs, which may be non-canonical.
__ orpd(kScratchDoubleReg, dst);
// Propagate sign discrepancy and (subtle) quiet NaNs.
__ subpd(kScratchDoubleReg, dst);
// Canonicalize NaNs by clearing the payload. Sign is non-deterministic.
__ cmppd(dst, kScratchDoubleReg, 3);
__ psrlq(dst, 13);
__ andnpd(dst, kScratchDoubleReg);
break;
}
case kX64F64x2Eq: {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
__ cmpeqpd(i.OutputSimd128Register(), i.InputSimd128Register(1));

View File

@ -167,6 +167,8 @@ namespace compiler {
V(X64F64x2Add) \
V(X64F64x2Sub) \
V(X64F64x2Mul) \
V(X64F64x2Min) \
V(X64F64x2Max) \
V(X64F64x2Eq) \
V(X64F64x2Ne) \
V(X64F64x2Lt) \

View File

@ -132,6 +132,8 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kX64F64x2Add:
case kX64F64x2Sub:
case kX64F64x2Mul:
case kX64F64x2Min:
case kX64F64x2Max:
case kX64F64x2Eq:
case kX64F64x2Ne:
case kX64F64x2Lt:

View File

@ -2590,6 +2590,8 @@ VISIT_ATOMIC_BINOP(Xor)
V(F64x2Add) \
V(F64x2Sub) \
V(F64x2Mul) \
V(F64x2Min) \
V(F64x2Max) \
V(F64x2Eq) \
V(F64x2Ne) \
V(F64x2Lt) \

View File

@ -251,6 +251,8 @@ MachineType AtomicOpType(Operator const* op) {
V(F64x2Add, Operator::kCommutative, 2, 0, 1) \
V(F64x2Sub, Operator::kNoProperties, 2, 0, 1) \
V(F64x2Mul, Operator::kCommutative, 2, 0, 1) \
V(F64x2Min, Operator::kCommutative, 2, 0, 1) \
V(F64x2Max, Operator::kCommutative, 2, 0, 1) \
V(F64x2Eq, Operator::kCommutative, 2, 0, 1) \
V(F64x2Ne, Operator::kCommutative, 2, 0, 1) \
V(F64x2Lt, Operator::kNoProperties, 2, 0, 1) \

View File

@ -475,6 +475,8 @@ class V8_EXPORT_PRIVATE MachineOperatorBuilder final
const Operator* F64x2Sub();
const Operator* F64x2Mul();
const Operator* F64x2ExtractLane(int32_t);
const Operator* F64x2Min();
const Operator* F64x2Max();
const Operator* F64x2ReplaceLane(int32_t);
const Operator* F64x2Eq();
const Operator* F64x2Ne();

View File

@ -749,6 +749,8 @@
V(F64x2Add) \
V(F64x2Sub) \
V(F64x2Mul) \
V(F64x2Min) \
V(F64x2Max) \
V(F64x2Eq) \
V(F64x2Ne) \
V(F64x2Lt) \

View File

@ -4013,6 +4013,12 @@ Node* WasmGraphBuilder::SimdOp(wasm::WasmOpcode opcode, Node* const* inputs) {
case wasm::kExprF64x2Mul:
return graph()->NewNode(mcgraph()->machine()->F64x2Mul(), inputs[0],
inputs[1]);
case wasm::kExprF64x2Min:
return graph()->NewNode(mcgraph()->machine()->F64x2Min(), inputs[0],
inputs[1]);
case wasm::kExprF64x2Max:
return graph()->NewNode(mcgraph()->machine()->F64x2Max(), inputs[0],
inputs[1]);
case wasm::kExprF64x2Eq:
return graph()->NewNode(mcgraph()->machine()->F64x2Eq(), inputs[0],
inputs[1]);

View File

@ -1847,6 +1847,8 @@ int DisassemblerX64::TwoByteOpcodeInstruction(byte* data) {
const char* mnemonic;
if (opcode == 0x54) {
mnemonic = "andpd";
} else if (opcode == 0x55) {
mnemonic = "andnpd";
} else if (opcode == 0x56) {
mnemonic = "orpd";
} else if (opcode == 0x57) {
@ -1859,6 +1861,10 @@ int DisassemblerX64::TwoByteOpcodeInstruction(byte* data) {
mnemonic = "cvtps2dq";
} else if (opcode == 0x5C) {
mnemonic = "subpd";
} else if (opcode == 0x5D) {
mnemonic = "minpd";
} else if (opcode == 0x5F) {
mnemonic = "maxpd";
} else if (opcode == 0x60) {
mnemonic = "punpcklbw";
} else if (opcode == 0x61) {

View File

@ -2244,6 +2244,8 @@ class ThreadImpl {
BINOP_CASE(F64x2Add, f64x2, float2, 2, a + b)
BINOP_CASE(F64x2Sub, f64x2, float2, 2, a - b)
BINOP_CASE(F64x2Mul, f64x2, float2, 2, a * b)
BINOP_CASE(F64x2Min, f64x2, float2, 2, JSMin(a, b))
BINOP_CASE(F64x2Max, f64x2, float2, 2, JSMax(a, b))
BINOP_CASE(F32x4Add, f32x4, float4, 4, a + b)
BINOP_CASE(F32x4Sub, f32x4, float4, 4, a - b)
BINOP_CASE(F32x4Mul, f32x4, float4, 4, a * b)

View File

@ -247,7 +247,9 @@ const char* WasmOpcodes::OpcodeName(WasmOpcode opcode) {
CASE_F32x4_OP(AddHoriz, "add_horizontal")
CASE_F32x4_OP(RecipApprox, "recip_approx")
CASE_F32x4_OP(RecipSqrtApprox, "recip_sqrt_approx")
CASE_F64x2_OP(Min, "min")
CASE_F32x4_OP(Min, "min")
CASE_F64x2_OP(Max, "max")
CASE_F32x4_OP(Max, "max")
CASE_F32x4_OP(Lt, "lt")
CASE_F32x4_OP(Le, "le")

View File

@ -406,6 +406,8 @@ bool IsJSCompatibleSignature(const FunctionSig* sig, bool hasBigIntFeature);
V(F64x2Add, 0xfda5, s_ss) \
V(F64x2Sub, 0xfda6, s_ss) \
V(F64x2Mul, 0xfda7, s_ss) \
V(F64x2Min, 0xfda9, s_ss) \
V(F64x2Max, 0xfdaa, s_ss) \
V(I32x4SConvertF32x4, 0xfdab, s_s) \
V(I32x4UConvertF32x4, 0xfdac, s_s) \
V(F32x4SConvertI32x4, 0xfdaf, s_s) \

View File

@ -1038,6 +1038,14 @@ WASM_SIMD_TEST_NO_LOWERING(F64x2Le) {
RunF64x2CompareOpTest(execution_tier, lower_simd, kExprF64x2Le, LessEqual);
}
bool IsExtreme(double x) {
double abs_x = std::fabs(x);
const double kSmallFloatThreshold = 1.0e-298;
const double kLargeFloatThreshold = 1.0e298;
return abs_x != 0.0f && // 0 or -0 are fine.
(abs_x < kSmallFloatThreshold || abs_x > kLargeFloatThreshold);
}
bool IsSameNan(double expected, double actual) {
// Sign is non-deterministic.
uint64_t expected_bits = bit_cast<uint64_t>(expected) & ~0x8000000000000000;
@ -1209,6 +1217,14 @@ WASM_SIMD_TEST_NO_LOWERING(F64x2Mul) {
RunF64x2BinOpTest(execution_tier, lower_simd, kExprF64x2Mul, Mul);
}
WASM_SIMD_TEST_NO_LOWERING(F64x2Min) {
RunF64x2BinOpTest(execution_tier, lower_simd, kExprF64x2Min, JSMin);
}
WASM_SIMD_TEST_NO_LOWERING(F64x2Max) {
RunF64x2BinOpTest(execution_tier, lower_simd, kExprF64x2Max, JSMax);
}
#undef FOR_FLOAT64_NAN_INPUTS
WASM_SIMD_TEST_NO_LOWERING(I64x2ExtractWithF64x2) {