[wasm-simd] Implement f64x2 add sub mul div for ia32

Bug: v8:9728 Change-Id: Ie769ae0431b7924a4b8f8858681d57e92c00f4b3 Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/1808400 Reviewed-by: Deepti Gandluri <gdeepti@chromium.org> Commit-Queue: Zhi An Ng <zhin@chromium.org> Cr-Commit-Position: refs/heads/master@{#64118}
2019-10-04 13:49:43 -07:00 · 2019-10-04 13:49:43 -07:00 · 8214bea687
commit 8214bea687
parent d05b2d3e3d
7 changed files with 57 additions and 3 deletions
--- a/src/codegen/ia32/macro-assembler-ia32.h
+++ b/src/codegen/ia32/macro-assembler-ia32.h
@ -299,6 +299,28 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
 #undef AVX_OP3_XO
 #undef AVX_OP3_WITH_TYPE

+// Only use this macro when dst and src1 is the same in SSE case.
+#define AVX_PACKED_OP3_WITH_TYPE(macro_name, name, dst_type, src_type) \
+  void macro_name(dst_type dst, dst_type src1, src_type src2) {        \
+    if (CpuFeatures::IsSupported(AVX)) {                               \
+      CpuFeatureScope scope(this, AVX);                                \
+      v##name(dst, src1, src2);                                        \
+    } else {                                                           \
+      DCHECK_EQ(dst, src1);                                            \
+      name(dst, src2);                                                 \
+    }                                                                  \
+  }
+#define AVX_PACKED_OP3(macro_name, name)                               \
+  AVX_PACKED_OP3_WITH_TYPE(macro_name, name, XMMRegister, XMMRegister) \
+  AVX_PACKED_OP3_WITH_TYPE(macro_name, name, XMMRegister, Operand)
+
+  AVX_PACKED_OP3(Addpd, addpd)
+  AVX_PACKED_OP3(Subpd, subpd)
+  AVX_PACKED_OP3(Mulpd, mulpd)
+  AVX_PACKED_OP3(Divpd, divpd)
+#undef AVX_PACKED_OP3
+#undef AVX_PACKED_OP3_WITH_TYPE
+
 // Non-SSE2 instructions.
 #define AVX_OP2_WITH_TYPE_SCOPE(macro_name, name, dst_type, src_type, \
                                sse_scope)                            \
--- a/src/compiler/backend/ia32/code-generator-ia32.cc
+++ b/src/compiler/backend/ia32/code-generator-ia32.cc
@ -1904,6 +1904,26 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
      __ Sqrtpd(i.OutputSimd128Register(), i.InputOperand(0));
      break;
    }
+    case kIA32F64x2Add: {
+      __ Addpd(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
+               i.InputOperand(1));
+      break;
+    }
+    case kIA32F64x2Sub: {
+      __ Subpd(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
+               i.InputOperand(1));
+      break;
+    }
+    case kIA32F64x2Mul: {
+      __ Mulpd(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
+               i.InputOperand(1));
+      break;
+    }
+    case kIA32F64x2Div: {
+      __ Divpd(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
+               i.InputOperand(1));
+      break;
+    }
    case kSSEF32x4Splat: {
      DCHECK_EQ(i.OutputDoubleRegister(), i.InputDoubleRegister(0));
      XMMRegister dst = i.OutputSimd128Register();
--- a/src/compiler/backend/ia32/instruction-codes-ia32.h
+++ b/src/compiler/backend/ia32/instruction-codes-ia32.h
@ -123,6 +123,10 @@ namespace compiler {
  V(SSEF64x2ReplaceLane)           \
  V(AVXF64x2ReplaceLane)           \
  V(IA32F64x2Sqrt)                 \
+  V(IA32F64x2Add)                  \
+  V(IA32F64x2Sub)                  \
+  V(IA32F64x2Mul)                  \
+  V(IA32F64x2Div)                  \
  V(SSEF32x4Splat)                 \
  V(AVXF32x4Splat)                 \
  V(SSEF32x4ExtractLane)           \
--- a/src/compiler/backend/ia32/instruction-scheduler-ia32.cc
+++ b/src/compiler/backend/ia32/instruction-scheduler-ia32.cc
@ -104,6 +104,10 @@ int InstructionScheduler::GetTargetInstructionFlags(
    case kSSEF64x2ReplaceLane:
    case kAVXF64x2ReplaceLane:
    case kIA32F64x2Sqrt:
+    case kIA32F64x2Add:
+    case kIA32F64x2Sub:
+    case kIA32F64x2Mul:
+    case kIA32F64x2Div:
    case kSSEF32x4Splat:
    case kAVXF32x4Splat:
    case kSSEF32x4ExtractLane:
--- a/src/compiler/backend/ia32/instruction-selector-ia32.cc
+++ b/src/compiler/backend/ia32/instruction-selector-ia32.cc
@ -860,7 +860,11 @@ void InstructionSelector::VisitWord32Ror(Node* node) {
  V(Float32Mul, kAVXFloat32Mul, kSSEFloat32Mul) \
  V(Float64Mul, kAVXFloat64Mul, kSSEFloat64Mul) \
  V(Float32Div, kAVXFloat32Div, kSSEFloat32Div) \
-  V(Float64Div, kAVXFloat64Div, kSSEFloat64Div)
+  V(Float64Div, kAVXFloat64Div, kSSEFloat64Div) \
+  V(F64x2Add, kIA32F64x2Add, kIA32F64x2Add)     \
+  V(F64x2Sub, kIA32F64x2Sub, kIA32F64x2Sub)     \
+  V(F64x2Mul, kIA32F64x2Mul, kIA32F64x2Mul)     \
+  V(F64x2Div, kIA32F64x2Div, kIA32F64x2Div)

 #define FLOAT_UNOP_LIST(V)                      \
  V(Float32Abs, kAVXFloat32Abs, kSSEFloat32Abs) \
--- a/src/compiler/backend/instruction-selector.cc
+++ b/src/compiler/backend/instruction-selector.cc
@ -2628,11 +2628,11 @@ void InstructionSelector::VisitF64x2ReplaceLane(Node* node) { UNIMPLEMENTED(); }
 void InstructionSelector::VisitF64x2Abs(Node* node) { UNIMPLEMENTED(); }
 void InstructionSelector::VisitF64x2Neg(Node* node) { UNIMPLEMENTED(); }
 void InstructionSelector::VisitF64x2Sqrt(Node* node) { UNIMPLEMENTED(); }
-#endif  // !V8_TARGET_ARCH_IA32
 void InstructionSelector::VisitF64x2Add(Node* node) { UNIMPLEMENTED(); }
 void InstructionSelector::VisitF64x2Sub(Node* node) { UNIMPLEMENTED(); }
 void InstructionSelector::VisitF64x2Mul(Node* node) { UNIMPLEMENTED(); }
 void InstructionSelector::VisitF64x2Div(Node* node) { UNIMPLEMENTED(); }
+#endif  // !V8_TARGET_ARCH_IA32
 void InstructionSelector::VisitF64x2Min(Node* node) { UNIMPLEMENTED(); }
 void InstructionSelector::VisitF64x2Max(Node* node) { UNIMPLEMENTED(); }
 void InstructionSelector::VisitF64x2Eq(Node* node) { UNIMPLEMENTED(); }
--- a/test/cctest/wasm/test-run-wasm-simd.cc
+++ b/test/cctest/wasm/test-run-wasm-simd.cc
@ -1289,7 +1289,6 @@ WASM_SIMD_TEST_NO_LOWERING(F64x2Sqrt) {
  RunF64x2UnOpTest(execution_tier, lower_simd, kExprF64x2Sqrt, Sqrt);
 }

-#if V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64
 void RunF64x2BinOpTest(ExecutionTier execution_tier, LowerSimd lower_simd,
                       WasmOpcode opcode, DoubleBinOp expected_op) {
  WasmRunner<int32_t, double, double> r(execution_tier, lower_simd);
@ -1353,6 +1352,7 @@ WASM_SIMD_TEST_NO_LOWERING(F64x2Div) {
  RunF64x2BinOpTest(execution_tier, lower_simd, kExprF64x2Div, Div);
 }

+#if V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64
 void RunF64x2CompareOpTest(ExecutionTier execution_tier, LowerSimd lower_simd,
                           WasmOpcode opcode, DoubleCompareOp expected_op) {
  WasmRunner<int32_t, double, double> r(execution_tier, lower_simd);