[wasm-simd] Implement f32x4.sqrt f64x2.sqrt for x64
Implementations for other architectures will follow in subsequent changes. Bug: v8:8460 Change-Id: I279388ab76b1d88d65cbe179088be5573c17fc58 Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/1796317 Commit-Queue: Zhi An Ng <zhin@chromium.org> Reviewed-by: Deepti Gandluri <gdeepti@chromium.org> Reviewed-by: Michael Starzinger <mstarzinger@chromium.org> Reviewed-by: Bill Budge <bbudge@chromium.org> Cr-Commit-Position: refs/heads/master@{#63693}
This commit is contained in:
parent
93a29bdce0
commit
134e110211
@ -1334,7 +1334,9 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase {
|
||||
impl(opcode, dst, src1, src2); \
|
||||
}
|
||||
|
||||
AVX_SP_3(vsqrt, 0x51)
|
||||
// vsqrtpd is defined by sqrtpd in SSE2_INSTRUCTION_LIST
|
||||
AVX_S_3(vsqrt, 0x51)
|
||||
AVX_3(vsqrtps, 0x51, vps)
|
||||
AVX_S_3(vadd, 0x58)
|
||||
AVX_S_3(vsub, 0x5c)
|
||||
AVX_S_3(vmul, 0x59)
|
||||
|
@ -6,6 +6,7 @@
|
||||
#define V8_CODEGEN_X64_SSE_INSTR_H_
|
||||
|
||||
#define SSE2_INSTRUCTION_LIST(V) \
|
||||
V(sqrtpd, 66, 0F, 51) \
|
||||
V(andnpd, 66, 0F, 55) \
|
||||
V(addpd, 66, 0F, 58) \
|
||||
V(mulpd, 66, 0F, 59) \
|
||||
|
@ -1857,6 +1857,8 @@ void InstructionSelector::VisitNode(Node* node) {
|
||||
return MarkAsSimd128(node), VisitF64x2Abs(node);
|
||||
case IrOpcode::kF64x2Neg:
|
||||
return MarkAsSimd128(node), VisitF64x2Neg(node);
|
||||
case IrOpcode::kF64x2Sqrt:
|
||||
return MarkAsSimd128(node), VisitF64x2Sqrt(node);
|
||||
case IrOpcode::kF64x2Add:
|
||||
return MarkAsSimd128(node), VisitF64x2Add(node);
|
||||
case IrOpcode::kF64x2Sub:
|
||||
@ -1891,6 +1893,8 @@ void InstructionSelector::VisitNode(Node* node) {
|
||||
return MarkAsSimd128(node), VisitF32x4Abs(node);
|
||||
case IrOpcode::kF32x4Neg:
|
||||
return MarkAsSimd128(node), VisitF32x4Neg(node);
|
||||
case IrOpcode::kF32x4Sqrt:
|
||||
return MarkAsSimd128(node), VisitF32x4Sqrt(node);
|
||||
case IrOpcode::kF32x4RecipApprox:
|
||||
return MarkAsSimd128(node), VisitF32x4RecipApprox(node);
|
||||
case IrOpcode::kF32x4RecipSqrtApprox:
|
||||
@ -2641,6 +2645,8 @@ void InstructionSelector::VisitI64x2GeU(Node* node) { UNIMPLEMENTED(); }
|
||||
void InstructionSelector::VisitS1x2AnyTrue(Node* node) { UNIMPLEMENTED(); }
|
||||
void InstructionSelector::VisitS1x2AllTrue(Node* node) { UNIMPLEMENTED(); }
|
||||
#endif // !V8_TARGET_ARCH_ARM64
|
||||
void InstructionSelector::VisitF64x2Sqrt(Node* node) { UNIMPLEMENTED(); }
|
||||
void InstructionSelector::VisitF32x4Sqrt(Node* node) { UNIMPLEMENTED(); }
|
||||
void InstructionSelector::VisitI64x2MinS(Node* node) { UNIMPLEMENTED(); }
|
||||
void InstructionSelector::VisitI64x2MaxS(Node* node) { UNIMPLEMENTED(); }
|
||||
void InstructionSelector::VisitI64x2MinU(Node* node) { UNIMPLEMENTED(); }
|
||||
|
@ -2293,6 +2293,10 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
|
||||
__ movq(i.OutputDoubleRegister(), kScratchRegister);
|
||||
break;
|
||||
}
|
||||
case kX64F64x2Sqrt: {
|
||||
__ sqrtpd(i.OutputSimd128Register(), i.InputSimd128Register(0));
|
||||
break;
|
||||
}
|
||||
case kX64F64x2Add: {
|
||||
ASSEMBLE_SSE_BINOP(addpd);
|
||||
break;
|
||||
@ -2445,6 +2449,10 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
|
||||
}
|
||||
break;
|
||||
}
|
||||
case kX64F32x4Sqrt: {
|
||||
__ sqrtps(i.OutputSimd128Register(), i.InputSimd128Register(0));
|
||||
break;
|
||||
}
|
||||
case kX64F32x4RecipApprox: {
|
||||
__ rcpps(i.OutputSimd128Register(), i.InputSimd128Register(0));
|
||||
break;
|
||||
|
@ -160,6 +160,7 @@ namespace compiler {
|
||||
V(X64F64x2ReplaceLane) \
|
||||
V(X64F64x2Abs) \
|
||||
V(X64F64x2Neg) \
|
||||
V(X64F64x2Sqrt) \
|
||||
V(X64F64x2Add) \
|
||||
V(X64F64x2Sub) \
|
||||
V(X64F64x2Mul) \
|
||||
@ -177,6 +178,7 @@ namespace compiler {
|
||||
V(X64F32x4UConvertI32x4) \
|
||||
V(X64F32x4Abs) \
|
||||
V(X64F32x4Neg) \
|
||||
V(X64F32x4Sqrt) \
|
||||
V(X64F32x4RecipApprox) \
|
||||
V(X64F32x4RecipSqrtApprox) \
|
||||
V(X64F32x4Add) \
|
||||
|
@ -129,6 +129,7 @@ int InstructionScheduler::GetTargetInstructionFlags(
|
||||
case kX64F64x2ReplaceLane:
|
||||
case kX64F64x2Abs:
|
||||
case kX64F64x2Neg:
|
||||
case kX64F64x2Sqrt:
|
||||
case kX64F64x2Add:
|
||||
case kX64F64x2Sub:
|
||||
case kX64F64x2Mul:
|
||||
@ -148,6 +149,7 @@ int InstructionScheduler::GetTargetInstructionFlags(
|
||||
case kX64F32x4RecipSqrtApprox:
|
||||
case kX64F32x4Abs:
|
||||
case kX64F32x4Neg:
|
||||
case kX64F32x4Sqrt:
|
||||
case kX64F32x4Add:
|
||||
case kX64F32x4AddHoriz:
|
||||
case kX64F32x4Sub:
|
||||
|
@ -2685,9 +2685,11 @@ VISIT_ATOMIC_BINOP(Xor)
|
||||
V(I8x16GtU)
|
||||
|
||||
#define SIMD_UNOP_LIST(V) \
|
||||
V(F64x2Sqrt) \
|
||||
V(F32x4SConvertI32x4) \
|
||||
V(F32x4Abs) \
|
||||
V(F32x4Neg) \
|
||||
V(F32x4Sqrt) \
|
||||
V(F32x4RecipApprox) \
|
||||
V(F32x4RecipSqrtApprox) \
|
||||
V(I64x2Neg) \
|
||||
|
@ -255,6 +255,7 @@ MachineType AtomicOpType(Operator const* op) {
|
||||
V(F64x2Splat, Operator::kNoProperties, 1, 0, 1) \
|
||||
V(F64x2Abs, Operator::kNoProperties, 1, 0, 1) \
|
||||
V(F64x2Neg, Operator::kNoProperties, 1, 0, 1) \
|
||||
V(F64x2Sqrt, Operator::kNoProperties, 1, 0, 1) \
|
||||
V(F64x2Add, Operator::kCommutative, 2, 0, 1) \
|
||||
V(F64x2Sub, Operator::kNoProperties, 2, 0, 1) \
|
||||
V(F64x2Mul, Operator::kCommutative, 2, 0, 1) \
|
||||
@ -270,6 +271,7 @@ MachineType AtomicOpType(Operator const* op) {
|
||||
V(F32x4UConvertI32x4, Operator::kNoProperties, 1, 0, 1) \
|
||||
V(F32x4Abs, Operator::kNoProperties, 1, 0, 1) \
|
||||
V(F32x4Neg, Operator::kNoProperties, 1, 0, 1) \
|
||||
V(F32x4Sqrt, Operator::kNoProperties, 1, 0, 1) \
|
||||
V(F32x4RecipApprox, Operator::kNoProperties, 1, 0, 1) \
|
||||
V(F32x4RecipSqrtApprox, Operator::kNoProperties, 1, 0, 1) \
|
||||
V(F32x4Add, Operator::kCommutative, 2, 0, 1) \
|
||||
|
@ -482,6 +482,7 @@ class V8_EXPORT_PRIVATE MachineOperatorBuilder final
|
||||
const Operator* F64x2Splat();
|
||||
const Operator* F64x2Abs();
|
||||
const Operator* F64x2Neg();
|
||||
const Operator* F64x2Sqrt();
|
||||
const Operator* F64x2Add();
|
||||
const Operator* F64x2Sub();
|
||||
const Operator* F64x2Mul();
|
||||
@ -502,6 +503,7 @@ class V8_EXPORT_PRIVATE MachineOperatorBuilder final
|
||||
const Operator* F32x4UConvertI32x4();
|
||||
const Operator* F32x4Abs();
|
||||
const Operator* F32x4Neg();
|
||||
const Operator* F32x4Sqrt();
|
||||
const Operator* F32x4RecipApprox();
|
||||
const Operator* F32x4RecipSqrtApprox();
|
||||
const Operator* F32x4Add();
|
||||
|
@ -749,6 +749,7 @@
|
||||
V(F64x2ReplaceLane) \
|
||||
V(F64x2Abs) \
|
||||
V(F64x2Neg) \
|
||||
V(F64x2Sqrt) \
|
||||
V(F64x2Add) \
|
||||
V(F64x2Sub) \
|
||||
V(F64x2Mul) \
|
||||
@ -766,6 +767,7 @@
|
||||
V(F32x4UConvertI32x4) \
|
||||
V(F32x4Abs) \
|
||||
V(F32x4Neg) \
|
||||
V(F32x4Sqrt) \
|
||||
V(F32x4RecipApprox) \
|
||||
V(F32x4RecipSqrtApprox) \
|
||||
V(F32x4Add) \
|
||||
|
@ -132,6 +132,7 @@ void SimdScalarLowering::LowerGraph() {
|
||||
V(F32x4UConvertI32x4) \
|
||||
V(F32x4Abs) \
|
||||
V(F32x4Neg) \
|
||||
V(F32x4Sqrt) \
|
||||
V(F32x4RecipApprox) \
|
||||
V(F32x4RecipSqrtApprox) \
|
||||
V(F32x4Add) \
|
||||
@ -1219,6 +1220,7 @@ void SimdScalarLowering::LowerNode(Node* node) {
|
||||
}
|
||||
F32X4_UNOP_CASE(Abs)
|
||||
F32X4_UNOP_CASE(Neg)
|
||||
F32X4_UNOP_CASE(Sqrt)
|
||||
#undef F32X4_UNOP_CASE
|
||||
case IrOpcode::kF32x4RecipApprox:
|
||||
case IrOpcode::kF32x4RecipSqrtApprox: {
|
||||
|
@ -4008,6 +4008,8 @@ Node* WasmGraphBuilder::SimdOp(wasm::WasmOpcode opcode, Node* const* inputs) {
|
||||
return graph()->NewNode(mcgraph()->machine()->F64x2Abs(), inputs[0]);
|
||||
case wasm::kExprF64x2Neg:
|
||||
return graph()->NewNode(mcgraph()->machine()->F64x2Neg(), inputs[0]);
|
||||
case wasm::kExprF64x2Sqrt:
|
||||
return graph()->NewNode(mcgraph()->machine()->F64x2Sqrt(), inputs[0]);
|
||||
case wasm::kExprF64x2Add:
|
||||
return graph()->NewNode(mcgraph()->machine()->F64x2Add(), inputs[0],
|
||||
inputs[1]);
|
||||
@ -4056,6 +4058,8 @@ Node* WasmGraphBuilder::SimdOp(wasm::WasmOpcode opcode, Node* const* inputs) {
|
||||
return graph()->NewNode(mcgraph()->machine()->F32x4Abs(), inputs[0]);
|
||||
case wasm::kExprF32x4Neg:
|
||||
return graph()->NewNode(mcgraph()->machine()->F32x4Neg(), inputs[0]);
|
||||
case wasm::kExprF32x4Sqrt:
|
||||
return graph()->NewNode(mcgraph()->machine()->F32x4Sqrt(), inputs[0]);
|
||||
case wasm::kExprF32x4RecipApprox:
|
||||
return graph()->NewNode(mcgraph()->machine()->F32x4RecipApprox(),
|
||||
inputs[0]);
|
||||
|
@ -1845,7 +1845,9 @@ int DisassemblerX64::TwoByteOpcodeInstruction(byte* data) {
|
||||
current += 1;
|
||||
} else {
|
||||
const char* mnemonic;
|
||||
if (opcode == 0x54) {
|
||||
if (opcode == 0x51) {
|
||||
mnemonic = "sqrtpd";
|
||||
} else if (opcode == 0x54) {
|
||||
mnemonic = "andpd";
|
||||
} else if (opcode == 0x55) {
|
||||
mnemonic = "andnpd";
|
||||
|
@ -2317,8 +2317,10 @@ class ThreadImpl {
|
||||
}
|
||||
UNOP_CASE(F64x2Abs, f64x2, float2, 2, std::abs(a))
|
||||
UNOP_CASE(F64x2Neg, f64x2, float2, 2, -a)
|
||||
UNOP_CASE(F64x2Sqrt, f64x2, float2, 2, std::sqrt(a))
|
||||
UNOP_CASE(F32x4Abs, f32x4, float4, 4, std::abs(a))
|
||||
UNOP_CASE(F32x4Neg, f32x4, float4, 4, -a)
|
||||
UNOP_CASE(F32x4Sqrt, f32x4, float4, 4, std::sqrt(a))
|
||||
UNOP_CASE(F32x4RecipApprox, f32x4, float4, 4, base::Recip(a))
|
||||
UNOP_CASE(F32x4RecipSqrtApprox, f32x4, float4, 4, base::RecipSqrt(a))
|
||||
UNOP_CASE(I64x2Neg, i64x2, int2, 2, base::NegateWithWraparound(a))
|
||||
|
@ -222,6 +222,8 @@ const char* WasmOpcodes::OpcodeName(WasmOpcode opcode) {
|
||||
CASE_SIMD_OP(Splat, "splat")
|
||||
CASE_SIMD_OP(Neg, "neg")
|
||||
CASE_F64x2_OP(Neg, "neg")
|
||||
CASE_F64x2_OP(Sqrt, "sqrt")
|
||||
CASE_F32x4_OP(Sqrt, "sqrt")
|
||||
CASE_I64x2_OP(Neg, "neg")
|
||||
CASE_SIMD_OP(Eq, "eq")
|
||||
CASE_F64x2_OP(Eq, "eq")
|
||||
|
@ -396,6 +396,7 @@ bool IsJSCompatibleSignature(const FunctionSig* sig, const WasmFeatures&);
|
||||
V(I64x2MaxU, 0xfd91, s_ss) \
|
||||
V(F32x4Abs, 0xfd95, s_s) \
|
||||
V(F32x4Neg, 0xfd96, s_s) \
|
||||
V(F32x4Sqrt, 0xfd97, s_s) \
|
||||
V(F32x4RecipApprox, 0xfd98, s_s) \
|
||||
V(F32x4RecipSqrtApprox, 0xfd99, s_s) \
|
||||
V(F32x4Add, 0xfd9a, s_ss) \
|
||||
@ -406,6 +407,7 @@ bool IsJSCompatibleSignature(const FunctionSig* sig, const WasmFeatures&);
|
||||
V(F32x4Max, 0xfd9f, s_ss) \
|
||||
V(F64x2Abs, 0xfda0, s_s) \
|
||||
V(F64x2Neg, 0xfda1, s_s) \
|
||||
V(F64x2Sqrt, 0xfda2, s_s) \
|
||||
V(F64x2Add, 0xfda5, s_ss) \
|
||||
V(F64x2Sub, 0xfda6, s_ss) \
|
||||
V(F64x2Mul, 0xfda7, s_ss) \
|
||||
|
@ -593,6 +593,12 @@ WASM_SIMD_TEST(F32x4Neg) {
|
||||
RunF32x4UnOpTest(execution_tier, lower_simd, kExprF32x4Neg, Negate);
|
||||
}
|
||||
|
||||
#if V8_TARGET_ARCH_X64
|
||||
WASM_SIMD_TEST(F32x4Sqrt) {
|
||||
RunF32x4UnOpTest(execution_tier, lower_simd, kExprF32x4Sqrt, Sqrt);
|
||||
}
|
||||
#endif // V8_TARGET_ARCH_X64
|
||||
|
||||
WASM_SIMD_TEST(F32x4RecipApprox) {
|
||||
RunF32x4UnOpTest(execution_tier, lower_simd, kExprF32x4RecipApprox,
|
||||
base::Recip, false /* !exact */);
|
||||
@ -1124,6 +1130,12 @@ WASM_SIMD_TEST_NO_LOWERING(F64x2Neg) {
|
||||
RunF64x2UnOpTest(execution_tier, lower_simd, kExprF64x2Neg, Negate);
|
||||
}
|
||||
|
||||
#if V8_TARGET_ARCH_X64
|
||||
WASM_SIMD_TEST_NO_LOWERING(F64x2Sqrt) {
|
||||
RunF64x2UnOpTest(execution_tier, lower_simd, kExprF64x2Sqrt, Sqrt);
|
||||
}
|
||||
#endif // V8_TARGET_ARCH_X64
|
||||
|
||||
void RunF64x2BinOpTest(ExecutionTier execution_tier, LowerSimd lower_simd,
|
||||
WasmOpcode opcode, DoubleBinOp expected_op) {
|
||||
WasmRunner<int32_t, double, double> r(execution_tier, lower_simd);
|
||||
|
Loading…
Reference in New Issue
Block a user