[wasm] Add F32x4{Abs, Neg, AddHoriz}

- Remove redundant instruction from I16x8Splat - Force F32x4Splat to use movss, as using MacroAssembler can mix SSE/AVX instructions Bug: v8:6020 Change-Id: I781c22adecf892a79b6a38c3d83fc4022f9067de Reviewed-on: https://chromium-review.googlesource.com/898429 Reviewed-by: Bill Budge <bbudge@chromium.org> Reviewed-by: Jaroslav Sevcik <jarin@chromium.org> Commit-Queue: Deepti Gandluri <gdeepti@chromium.org> Cr-Commit-Position: refs/heads/master@{#51123}
2018-02-01 15:36:59 -08:00 · 2018-02-01 15:36:59 -08:00 · 3363e51958
commit 3363e51958
parent cf9b487355
6 changed files with 64 additions and 35 deletions
--- a/src/compiler/instruction-selector.cc
+++ b/src/compiler/instruction-selector.cc
@ -2131,33 +2131,15 @@ void InstructionSelector::VisitF32x4UConvertI32x4(Node* node) {
 #endif  // !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_MIPS
        // && !V8_TARGET_ARCH_MIPS64

-#if !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_MIPS && \
-    !V8_TARGET_ARCH_MIPS64 && !V8_TARGET_ARCH_IA32
-void InstructionSelector::VisitF32x4Abs(Node* node) { UNIMPLEMENTED(); }
-
-void InstructionSelector::VisitF32x4Neg(Node* node) { UNIMPLEMENTED(); }
-#endif  // !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_MIPS
-        // && !V8_TARGET_ARCH_MIPS64 && !V8_TARGET_ARCH_IA32
-
-#if !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_MIPS && \
-    !V8_TARGET_ARCH_MIPS64 && !V8_TARGET_ARCH_X64
-void InstructionSelector::VisitF32x4RecipSqrtApprox(Node* node) {
-  UNIMPLEMENTED();
-}
-#endif  // !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_MIPS
-        // && !V8_TARGET_ARCH_MIPS64 && !V8_TARGET_ARCH_X64
-
-#if !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_MIPS && \
-    !V8_TARGET_ARCH_MIPS64
-void InstructionSelector::VisitF32x4AddHoriz(Node* node) { UNIMPLEMENTED(); }
-#endif  // !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_MIPS
-        // && !V8_TARGET_ARCH_MIPS64
-
 #if !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_MIPS && \
    !V8_TARGET_ARCH_MIPS64 && !V8_TARGET_ARCH_X64
 void InstructionSelector::VisitF32x4RecipApprox(Node* node) { UNIMPLEMENTED(); }

-void InstructionSelector::VisitI32x4AddHoriz(Node* node) { UNIMPLEMENTED(); }
+void InstructionSelector::VisitF32x4RecipSqrtApprox(Node* node) {
+  UNIMPLEMENTED();
+}
+
+void InstructionSelector::VisitF32x4AddHoriz(Node* node) { UNIMPLEMENTED(); }
 #endif  // !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_X64
        // && !V8_TARGET_ARCH_MIPS && !V8_TARGET_ARCH_MIPS64

@ -2170,7 +2152,17 @@ void InstructionSelector::VisitI32x4SConvertF32x4(Node* node) {
 void InstructionSelector::VisitI32x4UConvertF32x4(Node* node) {
  UNIMPLEMENTED();
 }
+#endif  // !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_MIPS
+        // && !V8_TARGET_ARCH_MIPS64

+#if !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_MIPS && \
+    !V8_TARGET_ARCH_MIPS64 && !V8_TARGET_ARCH_X64
+void InstructionSelector::VisitI32x4AddHoriz(Node* node) { UNIMPLEMENTED(); }
+#endif  // !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_MIPS
+        // && !V8_TARGET_ARCH_MIPS64 && !V8_TARGET_ARCH_X64
+
+#if !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_MIPS && \
+    !V8_TARGET_ARCH_MIPS64
 void InstructionSelector::VisitI32x4SConvertI16x8Low(Node* node) {
  UNIMPLEMENTED();
 }
--- a/src/compiler/x64/code-generator-x64.cc
+++ b/src/compiler/x64/code-generator-x64.cc
@ -2066,9 +2066,9 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
    case kX64F32x4Splat: {
      XMMRegister dst = i.OutputSimd128Register();
      if (instr->InputAt(0)->IsFPRegister()) {
-        __ Movss(dst, i.InputDoubleRegister(0));
+        __ movss(dst, i.InputDoubleRegister(0));
      } else {
-        __ Movss(dst, i.InputOperand(0));
+        __ movss(dst, i.InputOperand(0));
      }
      __ shufps(dst, dst, 0x0);
      break;
@ -2087,6 +2087,34 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
      __ insertps(i.OutputSimd128Register(), i.InputDoubleRegister(2), select);
      break;
    }
+    case kX64F32x4Abs: {
+      XMMRegister dst = i.OutputSimd128Register();
+      XMMRegister src = i.InputSimd128Register(0);
+      if (dst == src) {
+        __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
+        __ psrld(kScratchDoubleReg, 1);
+        __ andps(i.OutputSimd128Register(), kScratchDoubleReg);
+      } else {
+        __ pcmpeqd(dst, dst);
+        __ psrld(dst, 1);
+        __ andps(dst, i.InputSimd128Register(0));
+      }
+      break;
+    }
+    case kX64F32x4Neg: {
+      XMMRegister dst = i.OutputSimd128Register();
+      XMMRegister src = i.InputSimd128Register(0);
+      if (dst == src) {
+        __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
+        __ pslld(kScratchDoubleReg, 31);
+        __ xorps(i.OutputSimd128Register(), kScratchDoubleReg);
+      } else {
+        __ pcmpeqd(dst, dst);
+        __ pslld(dst, 31);
+        __ xorps(dst, i.InputSimd128Register(0));
+      }
+      break;
+    }
    case kX64F32x4RecipApprox: {
      __ rcpps(i.OutputSimd128Register(), i.InputSimd128Register(0));
      break;
@ -2100,6 +2128,11 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
      __ addps(i.OutputSimd128Register(), i.InputSimd128Register(1));
      break;
    }
+    case kX64F32x4AddHoriz: {
+      DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
+      __ haddps(i.OutputSimd128Register(), i.InputSimd128Register(1));
+      break;
+    }
    case kX64F32x4Sub: {
      DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
      __ subps(i.OutputSimd128Register(), i.InputSimd128Register(1));
@ -2273,7 +2306,6 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
      XMMRegister dst = i.OutputSimd128Register();
      __ movd(dst, i.InputRegister(0));
      __ pshuflw(dst, dst, 0x0);
-      __ pshufhw(dst, dst, 0x0);
      __ pshufd(dst, dst, 0x0);
      break;
    }
--- a/src/compiler/x64/instruction-codes-x64.h
+++ b/src/compiler/x64/instruction-codes-x64.h
@ -149,9 +149,12 @@ namespace compiler {
  V(X64F32x4Splat)                 \
  V(X64F32x4ExtractLane)           \
  V(X64F32x4ReplaceLane)           \
+  V(X64F32x4Abs)                   \
+  V(X64F32x4Neg)                   \
  V(X64F32x4RecipApprox)           \
  V(X64F32x4RecipSqrtApprox)       \
  V(X64F32x4Add)                   \
+  V(X64F32x4AddHoriz)              \
  V(X64F32x4Sub)                   \
  V(X64F32x4Mul)                   \
  V(X64F32x4Min)                   \
--- a/src/compiler/x64/instruction-scheduler-x64.cc
+++ b/src/compiler/x64/instruction-scheduler-x64.cc
@ -128,7 +128,10 @@ int InstructionScheduler::GetTargetInstructionFlags(
    case kX64F32x4ReplaceLane:
    case kX64F32x4RecipApprox:
    case kX64F32x4RecipSqrtApprox:
+    case kX64F32x4Abs:
+    case kX64F32x4Neg:
    case kX64F32x4Add:
+    case kX64F32x4AddHoriz:
    case kX64F32x4Sub:
    case kX64F32x4Mul:
    case kX64F32x4Min:
--- a/src/compiler/x64/instruction-selector-x64.cc
+++ b/src/compiler/x64/instruction-selector-x64.cc
@ -2374,6 +2374,7 @@ VISIT_ATOMIC_BINOP(Xor)

 #define SIMD_BINOP_LIST(V) \
  V(F32x4Add)              \
+  V(F32x4AddHoriz)         \
  V(F32x4Sub)              \
  V(F32x4Mul)              \
  V(F32x4Min)              \
@ -2435,6 +2436,8 @@ VISIT_ATOMIC_BINOP(Xor)
  V(S128Xor)

 #define SIMD_UNOP_LIST(V) \
+  V(F32x4Abs)             \
+  V(F32x4Neg)             \
  V(F32x4RecipApprox)     \
  V(F32x4RecipSqrtApprox) \
  V(I32x4Neg)             \
--- a/test/cctest/wasm/test-run-wasm-simd.cc
+++ b/test/cctest/wasm/test-run-wasm-simd.cc
@ -494,16 +494,12 @@ void RunF32x4UnOpTest(LowerSimd lower_simd, WasmOpcode simd_op,
  }
 }

-#if V8_TARGET_ARCH_ARM || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_MIPS || \
-    V8_TARGET_ARCH_MIPS64 || V8_TARGET_ARCH_IA32
 WASM_SIMD_TEST(F32x4Abs) {
  RunF32x4UnOpTest(lower_simd, kExprF32x4Abs, std::abs);
 }
 WASM_SIMD_TEST(F32x4Neg) {
  RunF32x4UnOpTest(lower_simd, kExprF32x4Neg, Negate);
 }
-#endif  // V8_TARGET_ARCH_ARM || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_MIPS ||
-        // V8_TARGET_ARCH_MIPS64 || V8_TARGET_ARCH_IA32

 #if V8_TARGET_ARCH_ARM || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_MIPS || \
    V8_TARGET_ARCH_MIPS64 || V8_TARGET_ARCH_X64
@ -1621,16 +1617,16 @@ WASM_SIMD_COMPILED_TEST(I16x8AddHoriz) {
  RunBinaryLaneOpTest<int16_t>(lower_simd, kExprI16x8AddHoriz,
                               {{1, 5, 9, 13, 17, 21, 25, 29}});
 }
+
+WASM_SIMD_COMPILED_TEST(F32x4AddHoriz) {
+  RunBinaryLaneOpTest<float>(lower_simd, kExprF32x4AddHoriz,
+                             {{1.0f, 5.0f, 9.0f, 13.0f}});
+}
 #endif  // V8_TARGET_ARCH_ARM || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_X64 ||
        // V8_TARGET_ARCH_MIPS || V8_TARGET_ARCH_MIPS64

 #if V8_TARGET_ARCH_ARM || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_MIPS || \
    V8_TARGET_ARCH_MIPS64
-WASM_SIMD_COMPILED_TEST(F32x4AddHoriz) {
-  RunBinaryLaneOpTest<float>(lower_simd, kExprF32x4AddHoriz,
-                             {{1.0f, 5.0f, 9.0f, 13.0f}});
-}
-
 // Test some regular shuffles that may have special handling on some targets.
 // Test a normal and unary versions (where second operand isn't used).
 WASM_SIMD_COMPILED_TEST(S32x4Dup) {