[wasm-simd] F32x4Div for ia32 x64 arm arm64

Bug: v8:8460
Change-Id: I529310a35b74964cb034b4c757981c7ec70f1d19
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/1765442
Reviewed-by: Bill Budge <bbudge@chromium.org>
Reviewed-by: Michael Starzinger <mstarzinger@chromium.org>
Commit-Queue: Zhi An Ng <zhin@chromium.org>
Cr-Commit-Position: refs/heads/master@{#63532}
This commit is contained in:
Ng Zhi An 2019-08-23 09:47:41 -07:00 committed by Commit Bot
parent 870600ecca
commit 85e2dbb32a
26 changed files with 70 additions and 0 deletions

View File

@ -1839,6 +1839,21 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
i.InputSimd128Register(1));
break;
}
case kArmF32x4Div: {
QwNeonRegister dst = i.OutputSimd128Register();
QwNeonRegister src1 = i.InputSimd128Register(0);
QwNeonRegister src2 = i.InputSimd128Register(1);
DCHECK_EQ(dst, q0);
DCHECK_EQ(src1, q0);
DCHECK_EQ(src2, q1);
#define S_FROM_Q(reg, lane) SwVfpRegister::from_code(reg.code() * 4 + lane)
__ vdiv(S_FROM_Q(dst, 0), S_FROM_Q(src1, 0), S_FROM_Q(src2, 0));
__ vdiv(S_FROM_Q(dst, 1), S_FROM_Q(src1, 1), S_FROM_Q(src2, 1));
__ vdiv(S_FROM_Q(dst, 2), S_FROM_Q(src1, 2), S_FROM_Q(src2, 2));
__ vdiv(S_FROM_Q(dst, 3), S_FROM_Q(src1, 3), S_FROM_Q(src2, 3));
#undef S_FROM_Q
break;
}
case kArmF32x4Min: {
__ vmin(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(1));

View File

@ -141,6 +141,7 @@ namespace compiler {
V(ArmF32x4AddHoriz) \
V(ArmF32x4Sub) \
V(ArmF32x4Mul) \
V(ArmF32x4Div) \
V(ArmF32x4Min) \
V(ArmF32x4Max) \
V(ArmF32x4Eq) \

View File

@ -121,6 +121,7 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kArmF32x4AddHoriz:
case kArmF32x4Sub:
case kArmF32x4Mul:
case kArmF32x4Div:
case kArmF32x4Min:
case kArmF32x4Max:
case kArmF32x4Eq:

View File

@ -2513,6 +2513,14 @@ SIMD_BINOP_LIST(SIMD_VISIT_BINOP)
#undef SIMD_VISIT_BINOP
#undef SIMD_BINOP_LIST
void InstructionSelector::VisitF32x4Div(Node* node) {
ArmOperandGenerator g(this);
// Use fixed registers in the lower 8 Q-registers so we can directly access
// mapped registers S0-S31.
Emit(kArmF32x4Div, g.DefineAsFixed(node, q0),
g.UseFixed(node->InputAt(0), q0), g.UseFixed(node->InputAt(1), q1));
}
void InstructionSelector::VisitS128Select(Node* node) {
ArmOperandGenerator g(this);
Emit(kArmS128Select, g.DefineSameAsFirst(node),

View File

@ -1846,6 +1846,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
SIMD_BINOP_CASE(kArm64F32x4AddHoriz, Faddp, 4S);
SIMD_BINOP_CASE(kArm64F32x4Sub, Fsub, 4S);
SIMD_BINOP_CASE(kArm64F32x4Mul, Fmul, 4S);
SIMD_BINOP_CASE(kArm64F32x4Div, Fdiv, 4S);
SIMD_BINOP_CASE(kArm64F32x4Min, Fmin, 4S);
SIMD_BINOP_CASE(kArm64F32x4Max, Fmax, 4S);
SIMD_BINOP_CASE(kArm64F32x4Eq, Fcmeq, 4S);

View File

@ -198,6 +198,7 @@ namespace compiler {
V(Arm64F32x4AddHoriz) \
V(Arm64F32x4Sub) \
V(Arm64F32x4Mul) \
V(Arm64F32x4Div) \
V(Arm64F32x4Min) \
V(Arm64F32x4Max) \
V(Arm64F32x4Eq) \

View File

@ -165,6 +165,7 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kArm64F32x4AddHoriz:
case kArm64F32x4Sub:
case kArm64F32x4Mul:
case kArm64F32x4Div:
case kArm64F32x4Min:
case kArm64F32x4Max:
case kArm64F32x4Eq:

View File

@ -3113,6 +3113,7 @@ void InstructionSelector::VisitInt64AbsWithOverflow(Node* node) {
V(F32x4AddHoriz, kArm64F32x4AddHoriz) \
V(F32x4Sub, kArm64F32x4Sub) \
V(F32x4Mul, kArm64F32x4Mul) \
V(F32x4Div, kArm64F32x4Div) \
V(F32x4Min, kArm64F32x4Min) \
V(F32x4Max, kArm64F32x4Max) \
V(F32x4Eq, kArm64F32x4Eq) \

View File

@ -2004,6 +2004,17 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
i.InputOperand(1));
break;
}
case kSSEF32x4Div: {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
__ divps(i.OutputSimd128Register(), i.InputOperand(1));
break;
}
case kAVXF32x4Div: {
CpuFeatureScope avx_scope(tasm(), AVX);
__ vdivps(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputOperand(1));
break;
}
case kSSEF32x4Min: {
XMMRegister src1 = i.InputSimd128Register(1),
dst = i.OutputSimd128Register();

View File

@ -139,6 +139,8 @@ namespace compiler {
V(AVXF32x4Sub) \
V(SSEF32x4Mul) \
V(AVXF32x4Mul) \
V(SSEF32x4Div) \
V(AVXF32x4Div) \
V(SSEF32x4Min) \
V(AVXF32x4Min) \
V(SSEF32x4Max) \

View File

@ -120,6 +120,8 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kAVXF32x4Sub:
case kSSEF32x4Mul:
case kAVXF32x4Mul:
case kSSEF32x4Div:
case kAVXF32x4Div:
case kSSEF32x4Min:
case kAVXF32x4Min:
case kSSEF32x4Max:

View File

@ -1891,6 +1891,7 @@ void InstructionSelector::VisitWord32AtomicPairCompareExchange(Node* node) {
V(F32x4AddHoriz) \
V(F32x4Sub) \
V(F32x4Mul) \
V(F32x4Div) \
V(F32x4Min) \
V(F32x4Max) \
V(F32x4Eq) \

View File

@ -1903,6 +1903,8 @@ void InstructionSelector::VisitNode(Node* node) {
return MarkAsSimd128(node), VisitF32x4Sub(node);
case IrOpcode::kF32x4Mul:
return MarkAsSimd128(node), VisitF32x4Mul(node);
case IrOpcode::kF32x4Div:
return MarkAsSimd128(node), VisitF32x4Div(node);
case IrOpcode::kF32x4Min:
return MarkAsSimd128(node), VisitF32x4Min(node);
case IrOpcode::kF32x4Max:

View File

@ -2691,6 +2691,8 @@ void InstructionSelector::VisitF32x4Sub(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF32x4Mul(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF32x4Div(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF32x4Min(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF32x4Max(Node* node) { UNIMPLEMENTED(); }

View File

@ -2474,6 +2474,11 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ mulps(i.OutputSimd128Register(), i.InputSimd128Register(1));
break;
}
case kX64F32x4Div: {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
__ divps(i.OutputSimd128Register(), i.InputSimd128Register(1));
break;
}
case kX64F32x4Min: {
XMMRegister src1 = i.InputSimd128Register(1),
dst = i.OutputSimd128Register();

View File

@ -183,6 +183,7 @@ namespace compiler {
V(X64F32x4AddHoriz) \
V(X64F32x4Sub) \
V(X64F32x4Mul) \
V(X64F32x4Div) \
V(X64F32x4Min) \
V(X64F32x4Max) \
V(X64F32x4Eq) \

View File

@ -152,6 +152,7 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kX64F32x4AddHoriz:
case kX64F32x4Sub:
case kX64F32x4Mul:
case kX64F32x4Div:
case kX64F32x4Min:
case kX64F32x4Max:
case kX64F32x4Eq:

View File

@ -2613,6 +2613,7 @@ VISIT_ATOMIC_BINOP(Xor)
V(F32x4AddHoriz) \
V(F32x4Sub) \
V(F32x4Mul) \
V(F32x4Div) \
V(F32x4Min) \
V(F32x4Max) \
V(F32x4Eq) \

View File

@ -276,6 +276,7 @@ MachineType AtomicOpType(Operator const* op) {
V(F32x4AddHoriz, Operator::kNoProperties, 2, 0, 1) \
V(F32x4Sub, Operator::kNoProperties, 2, 0, 1) \
V(F32x4Mul, Operator::kCommutative, 2, 0, 1) \
V(F32x4Div, Operator::kNoProperties, 2, 0, 1) \
V(F32x4Min, Operator::kCommutative, 2, 0, 1) \
V(F32x4Max, Operator::kCommutative, 2, 0, 1) \
V(F32x4Eq, Operator::kCommutative, 2, 0, 1) \

View File

@ -772,6 +772,7 @@
V(F32x4AddHoriz) \
V(F32x4Sub) \
V(F32x4Mul) \
V(F32x4Div) \
V(F32x4Min) \
V(F32x4Max) \
V(F32x4Eq) \

View File

@ -138,6 +138,7 @@ void SimdScalarLowering::LowerGraph() {
V(F32x4AddHoriz) \
V(F32x4Sub) \
V(F32x4Mul) \
V(F32x4Div) \
V(F32x4Min) \
V(F32x4Max)
@ -1207,6 +1208,7 @@ void SimdScalarLowering::LowerNode(Node* node) {
F32X4_BINOP_CASE(Add)
F32X4_BINOP_CASE(Sub)
F32X4_BINOP_CASE(Mul)
F32X4_BINOP_CASE(Div)
F32X4_BINOP_CASE(Min)
F32X4_BINOP_CASE(Max)
#undef F32X4_BINOP_CASE

View File

@ -4074,6 +4074,9 @@ Node* WasmGraphBuilder::SimdOp(wasm::WasmOpcode opcode, Node* const* inputs) {
case wasm::kExprF32x4Mul:
return graph()->NewNode(mcgraph()->machine()->F32x4Mul(), inputs[0],
inputs[1]);
case wasm::kExprF32x4Div:
return graph()->NewNode(mcgraph()->machine()->F32x4Div(), inputs[0],
inputs[1]);
case wasm::kExprF32x4Min:
return graph()->NewNode(mcgraph()->machine()->F32x4Min(), inputs[0],
inputs[1]);

View File

@ -2250,6 +2250,7 @@ class ThreadImpl {
BINOP_CASE(F32x4Add, f32x4, float4, 4, a + b)
BINOP_CASE(F32x4Sub, f32x4, float4, 4, a - b)
BINOP_CASE(F32x4Mul, f32x4, float4, 4, a * b)
BINOP_CASE(F32x4Div, f32x4, float4, 4, a / b)
BINOP_CASE(F32x4Min, f32x4, float4, 4, JSMin(a, b))
BINOP_CASE(F32x4Max, f32x4, float4, 4, JSMax(a, b))
BINOP_CASE(I64x2Add, i64x2, int2, 2, base::AddWithWraparound(a, b))

View File

@ -239,6 +239,7 @@ const char* WasmOpcodes::OpcodeName(WasmOpcode opcode) {
CASE_F64x2_OP(Mul, "mul")
CASE_I64x2_OP(Mul, "mul")
CASE_F64x2_OP(Div, "div")
CASE_F32x4_OP(Div, "div")
CASE_F64x2_OP(Splat, "splat")
CASE_F64x2_OP(Lt, "lt")
CASE_F64x2_OP(Le, "le")

View File

@ -401,6 +401,7 @@ bool IsJSCompatibleSignature(const FunctionSig* sig, const WasmFeatures&);
V(F32x4Add, 0xfd9a, s_ss) \
V(F32x4Sub, 0xfd9b, s_ss) \
V(F32x4Mul, 0xfd9c, s_ss) \
V(F32x4Div, 0xfd9d, s_ss) \
V(F32x4Min, 0xfd9e, s_ss) \
V(F32x4Max, 0xfd9f, s_ss) \
V(F64x2Abs, 0xfda0, s_s) \

View File

@ -660,6 +660,9 @@ WASM_SIMD_TEST(F32x4Sub) {
WASM_SIMD_TEST(F32x4Mul) {
RunF32x4BinOpTest(execution_tier, lower_simd, kExprF32x4Mul, Mul);
}
WASM_SIMD_TEST(F32x4Div) {
RunF32x4BinOpTest(execution_tier, lower_simd, kExprF32x4Div, Div);
}
WASM_SIMD_TEST(F32x4Min) {
RunF32x4BinOpTest(execution_tier, lower_simd, kExprF32x4Min, JSMin);
}