[wasm-simd] Implement F64x2 min max for x64
Also add a IsExtreme(double) overload. This wasn't causing issues because there was no codepath which exercised it (only approx operations did). Change-Id: If7583fb567137c428d16c0d2cdfc37e086f7f3fd Bug: v8:8460 Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/1726675 Reviewed-by: Michael Starzinger <mstarzinger@chromium.org> Reviewed-by: Bill Budge <bbudge@chromium.org> Commit-Queue: Zhi An Ng <zhin@chromium.org> Cr-Commit-Position: refs/heads/master@{#63053}
This commit is contained in:
parent
5cf67ad933
commit
e17ac92556
@ -1335,10 +1335,10 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase {
|
||||
AVX_S_3(vsub, 0x5c)
|
||||
AVX_S_3(vmul, 0x59)
|
||||
AVX_SP_3(vdiv, 0x5e)
|
||||
AVX_SP_3(vmin, 0x5d)
|
||||
AVX_SP_3(vmax, 0x5f)
|
||||
AVX_S_3(vmin, 0x5d)
|
||||
AVX_S_3(vmax, 0x5f)
|
||||
AVX_P_3(vand, 0x54)
|
||||
AVX_P_3(vandn, 0x55)
|
||||
AVX_3(vandnps, 0x55, vps)
|
||||
AVX_P_3(vor, 0x56)
|
||||
AVX_P_3(vxor, 0x57)
|
||||
AVX_3(vcvtsd2ss, 0x5a, vsd)
|
||||
|
@ -6,10 +6,13 @@
|
||||
#define V8_CODEGEN_X64_SSE_INSTR_H_
|
||||
|
||||
#define SSE2_INSTRUCTION_LIST(V) \
|
||||
V(andnpd, 66, 0F, 55) \
|
||||
V(addpd, 66, 0F, 58) \
|
||||
V(mulpd, 66, 0F, 59) \
|
||||
V(cvtps2dq, 66, 0F, 5B) \
|
||||
V(subpd, 66, 0F, 5C) \
|
||||
V(minpd, 66, 0F, 5D) \
|
||||
V(maxpd, 66, 0F, 5F) \
|
||||
V(punpcklbw, 66, 0F, 60) \
|
||||
V(punpcklwd, 66, 0F, 61) \
|
||||
V(punpckldq, 66, 0F, 62) \
|
||||
|
@ -1833,6 +1833,10 @@ void InstructionSelector::VisitNode(Node* node) {
|
||||
return MarkAsSimd128(node), VisitF64x2Sub(node);
|
||||
case IrOpcode::kF64x2Mul:
|
||||
return MarkAsSimd128(node), VisitF64x2Mul(node);
|
||||
case IrOpcode::kF64x2Min:
|
||||
return MarkAsSimd128(node), VisitF64x2Min(node);
|
||||
case IrOpcode::kF64x2Max:
|
||||
return MarkAsSimd128(node), VisitF64x2Max(node);
|
||||
case IrOpcode::kF64x2Eq:
|
||||
return MarkAsSimd128(node), VisitF64x2Eq(node);
|
||||
case IrOpcode::kF64x2Ne:
|
||||
@ -2575,6 +2579,8 @@ void InstructionSelector::VisitF64x2Neg(Node* node) { UNIMPLEMENTED(); }
|
||||
void InstructionSelector::VisitF64x2Add(Node* node) { UNIMPLEMENTED(); }
|
||||
void InstructionSelector::VisitF64x2Sub(Node* node) { UNIMPLEMENTED(); }
|
||||
void InstructionSelector::VisitF64x2Mul(Node* node) { UNIMPLEMENTED(); }
|
||||
void InstructionSelector::VisitF64x2Min(Node* node) { UNIMPLEMENTED(); }
|
||||
void InstructionSelector::VisitF64x2Max(Node* node) { UNIMPLEMENTED(); }
|
||||
void InstructionSelector::VisitF64x2Eq(Node* node) { UNIMPLEMENTED(); }
|
||||
void InstructionSelector::VisitF64x2Ne(Node* node) { UNIMPLEMENTED(); }
|
||||
void InstructionSelector::VisitF64x2Lt(Node* node) { UNIMPLEMENTED(); }
|
||||
|
@ -2296,6 +2296,45 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
|
||||
ASSEMBLE_SSE_BINOP(mulpd);
|
||||
break;
|
||||
}
|
||||
case kX64F64x2Min: {
|
||||
XMMRegister src1 = i.InputSimd128Register(1),
|
||||
dst = i.OutputSimd128Register();
|
||||
DCHECK_EQ(dst, i.InputSimd128Register(0));
|
||||
// The minpd instruction doesn't propagate NaNs and +0's in its first
|
||||
// operand. Perform minpd in both orders, merge the resuls, and adjust.
|
||||
__ movapd(kScratchDoubleReg, src1);
|
||||
__ minpd(kScratchDoubleReg, dst);
|
||||
__ minpd(dst, src1);
|
||||
// propagate -0's and NaNs, which may be non-canonical.
|
||||
__ orpd(kScratchDoubleReg, dst);
|
||||
// Canonicalize NaNs by quieting and clearing the payload.
|
||||
__ cmppd(dst, kScratchDoubleReg, 3);
|
||||
__ orpd(kScratchDoubleReg, dst);
|
||||
__ psrlq(dst, 13);
|
||||
__ andnpd(dst, kScratchDoubleReg);
|
||||
break;
|
||||
}
|
||||
case kX64F64x2Max: {
|
||||
XMMRegister src1 = i.InputSimd128Register(1),
|
||||
dst = i.OutputSimd128Register();
|
||||
DCHECK_EQ(dst, i.InputSimd128Register(0));
|
||||
// The maxpd instruction doesn't propagate NaNs and +0's in its first
|
||||
// operand. Perform maxpd in both orders, merge the resuls, and adjust.
|
||||
__ movapd(kScratchDoubleReg, src1);
|
||||
__ maxpd(kScratchDoubleReg, dst);
|
||||
__ maxpd(dst, src1);
|
||||
// Find discrepancies.
|
||||
__ xorpd(dst, kScratchDoubleReg);
|
||||
// Propagate NaNs, which may be non-canonical.
|
||||
__ orpd(kScratchDoubleReg, dst);
|
||||
// Propagate sign discrepancy and (subtle) quiet NaNs.
|
||||
__ subpd(kScratchDoubleReg, dst);
|
||||
// Canonicalize NaNs by clearing the payload. Sign is non-deterministic.
|
||||
__ cmppd(dst, kScratchDoubleReg, 3);
|
||||
__ psrlq(dst, 13);
|
||||
__ andnpd(dst, kScratchDoubleReg);
|
||||
break;
|
||||
}
|
||||
case kX64F64x2Eq: {
|
||||
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
|
||||
__ cmpeqpd(i.OutputSimd128Register(), i.InputSimd128Register(1));
|
||||
|
@ -167,6 +167,8 @@ namespace compiler {
|
||||
V(X64F64x2Add) \
|
||||
V(X64F64x2Sub) \
|
||||
V(X64F64x2Mul) \
|
||||
V(X64F64x2Min) \
|
||||
V(X64F64x2Max) \
|
||||
V(X64F64x2Eq) \
|
||||
V(X64F64x2Ne) \
|
||||
V(X64F64x2Lt) \
|
||||
|
@ -132,6 +132,8 @@ int InstructionScheduler::GetTargetInstructionFlags(
|
||||
case kX64F64x2Add:
|
||||
case kX64F64x2Sub:
|
||||
case kX64F64x2Mul:
|
||||
case kX64F64x2Min:
|
||||
case kX64F64x2Max:
|
||||
case kX64F64x2Eq:
|
||||
case kX64F64x2Ne:
|
||||
case kX64F64x2Lt:
|
||||
|
@ -2590,6 +2590,8 @@ VISIT_ATOMIC_BINOP(Xor)
|
||||
V(F64x2Add) \
|
||||
V(F64x2Sub) \
|
||||
V(F64x2Mul) \
|
||||
V(F64x2Min) \
|
||||
V(F64x2Max) \
|
||||
V(F64x2Eq) \
|
||||
V(F64x2Ne) \
|
||||
V(F64x2Lt) \
|
||||
|
@ -251,6 +251,8 @@ MachineType AtomicOpType(Operator const* op) {
|
||||
V(F64x2Add, Operator::kCommutative, 2, 0, 1) \
|
||||
V(F64x2Sub, Operator::kNoProperties, 2, 0, 1) \
|
||||
V(F64x2Mul, Operator::kCommutative, 2, 0, 1) \
|
||||
V(F64x2Min, Operator::kCommutative, 2, 0, 1) \
|
||||
V(F64x2Max, Operator::kCommutative, 2, 0, 1) \
|
||||
V(F64x2Eq, Operator::kCommutative, 2, 0, 1) \
|
||||
V(F64x2Ne, Operator::kCommutative, 2, 0, 1) \
|
||||
V(F64x2Lt, Operator::kNoProperties, 2, 0, 1) \
|
||||
|
@ -475,6 +475,8 @@ class V8_EXPORT_PRIVATE MachineOperatorBuilder final
|
||||
const Operator* F64x2Sub();
|
||||
const Operator* F64x2Mul();
|
||||
const Operator* F64x2ExtractLane(int32_t);
|
||||
const Operator* F64x2Min();
|
||||
const Operator* F64x2Max();
|
||||
const Operator* F64x2ReplaceLane(int32_t);
|
||||
const Operator* F64x2Eq();
|
||||
const Operator* F64x2Ne();
|
||||
|
@ -749,6 +749,8 @@
|
||||
V(F64x2Add) \
|
||||
V(F64x2Sub) \
|
||||
V(F64x2Mul) \
|
||||
V(F64x2Min) \
|
||||
V(F64x2Max) \
|
||||
V(F64x2Eq) \
|
||||
V(F64x2Ne) \
|
||||
V(F64x2Lt) \
|
||||
|
@ -4013,6 +4013,12 @@ Node* WasmGraphBuilder::SimdOp(wasm::WasmOpcode opcode, Node* const* inputs) {
|
||||
case wasm::kExprF64x2Mul:
|
||||
return graph()->NewNode(mcgraph()->machine()->F64x2Mul(), inputs[0],
|
||||
inputs[1]);
|
||||
case wasm::kExprF64x2Min:
|
||||
return graph()->NewNode(mcgraph()->machine()->F64x2Min(), inputs[0],
|
||||
inputs[1]);
|
||||
case wasm::kExprF64x2Max:
|
||||
return graph()->NewNode(mcgraph()->machine()->F64x2Max(), inputs[0],
|
||||
inputs[1]);
|
||||
case wasm::kExprF64x2Eq:
|
||||
return graph()->NewNode(mcgraph()->machine()->F64x2Eq(), inputs[0],
|
||||
inputs[1]);
|
||||
|
@ -1847,6 +1847,8 @@ int DisassemblerX64::TwoByteOpcodeInstruction(byte* data) {
|
||||
const char* mnemonic;
|
||||
if (opcode == 0x54) {
|
||||
mnemonic = "andpd";
|
||||
} else if (opcode == 0x55) {
|
||||
mnemonic = "andnpd";
|
||||
} else if (opcode == 0x56) {
|
||||
mnemonic = "orpd";
|
||||
} else if (opcode == 0x57) {
|
||||
@ -1859,6 +1861,10 @@ int DisassemblerX64::TwoByteOpcodeInstruction(byte* data) {
|
||||
mnemonic = "cvtps2dq";
|
||||
} else if (opcode == 0x5C) {
|
||||
mnemonic = "subpd";
|
||||
} else if (opcode == 0x5D) {
|
||||
mnemonic = "minpd";
|
||||
} else if (opcode == 0x5F) {
|
||||
mnemonic = "maxpd";
|
||||
} else if (opcode == 0x60) {
|
||||
mnemonic = "punpcklbw";
|
||||
} else if (opcode == 0x61) {
|
||||
|
@ -2244,6 +2244,8 @@ class ThreadImpl {
|
||||
BINOP_CASE(F64x2Add, f64x2, float2, 2, a + b)
|
||||
BINOP_CASE(F64x2Sub, f64x2, float2, 2, a - b)
|
||||
BINOP_CASE(F64x2Mul, f64x2, float2, 2, a * b)
|
||||
BINOP_CASE(F64x2Min, f64x2, float2, 2, JSMin(a, b))
|
||||
BINOP_CASE(F64x2Max, f64x2, float2, 2, JSMax(a, b))
|
||||
BINOP_CASE(F32x4Add, f32x4, float4, 4, a + b)
|
||||
BINOP_CASE(F32x4Sub, f32x4, float4, 4, a - b)
|
||||
BINOP_CASE(F32x4Mul, f32x4, float4, 4, a * b)
|
||||
|
@ -247,7 +247,9 @@ const char* WasmOpcodes::OpcodeName(WasmOpcode opcode) {
|
||||
CASE_F32x4_OP(AddHoriz, "add_horizontal")
|
||||
CASE_F32x4_OP(RecipApprox, "recip_approx")
|
||||
CASE_F32x4_OP(RecipSqrtApprox, "recip_sqrt_approx")
|
||||
CASE_F64x2_OP(Min, "min")
|
||||
CASE_F32x4_OP(Min, "min")
|
||||
CASE_F64x2_OP(Max, "max")
|
||||
CASE_F32x4_OP(Max, "max")
|
||||
CASE_F32x4_OP(Lt, "lt")
|
||||
CASE_F32x4_OP(Le, "le")
|
||||
|
@ -406,6 +406,8 @@ bool IsJSCompatibleSignature(const FunctionSig* sig, bool hasBigIntFeature);
|
||||
V(F64x2Add, 0xfda5, s_ss) \
|
||||
V(F64x2Sub, 0xfda6, s_ss) \
|
||||
V(F64x2Mul, 0xfda7, s_ss) \
|
||||
V(F64x2Min, 0xfda9, s_ss) \
|
||||
V(F64x2Max, 0xfdaa, s_ss) \
|
||||
V(I32x4SConvertF32x4, 0xfdab, s_s) \
|
||||
V(I32x4UConvertF32x4, 0xfdac, s_s) \
|
||||
V(F32x4SConvertI32x4, 0xfdaf, s_s) \
|
||||
|
@ -1038,6 +1038,14 @@ WASM_SIMD_TEST_NO_LOWERING(F64x2Le) {
|
||||
RunF64x2CompareOpTest(execution_tier, lower_simd, kExprF64x2Le, LessEqual);
|
||||
}
|
||||
|
||||
bool IsExtreme(double x) {
|
||||
double abs_x = std::fabs(x);
|
||||
const double kSmallFloatThreshold = 1.0e-298;
|
||||
const double kLargeFloatThreshold = 1.0e298;
|
||||
return abs_x != 0.0f && // 0 or -0 are fine.
|
||||
(abs_x < kSmallFloatThreshold || abs_x > kLargeFloatThreshold);
|
||||
}
|
||||
|
||||
bool IsSameNan(double expected, double actual) {
|
||||
// Sign is non-deterministic.
|
||||
uint64_t expected_bits = bit_cast<uint64_t>(expected) & ~0x8000000000000000;
|
||||
@ -1209,6 +1217,14 @@ WASM_SIMD_TEST_NO_LOWERING(F64x2Mul) {
|
||||
RunF64x2BinOpTest(execution_tier, lower_simd, kExprF64x2Mul, Mul);
|
||||
}
|
||||
|
||||
WASM_SIMD_TEST_NO_LOWERING(F64x2Min) {
|
||||
RunF64x2BinOpTest(execution_tier, lower_simd, kExprF64x2Min, JSMin);
|
||||
}
|
||||
|
||||
WASM_SIMD_TEST_NO_LOWERING(F64x2Max) {
|
||||
RunF64x2BinOpTest(execution_tier, lower_simd, kExprF64x2Max, JSMax);
|
||||
}
|
||||
|
||||
#undef FOR_FLOAT64_NAN_INPUTS
|
||||
|
||||
WASM_SIMD_TEST_NO_LOWERING(I64x2ExtractWithF64x2) {
|
||||
|
Loading…
Reference in New Issue
Block a user