Revert "[wasm-simd] Implement remaining I8x16 SIMD ops on x64"

This reverts commit de88bfb270. Reason for revert: Breaks msvc build https://ci.chromium.org/p/v8/builders/luci.v8.ci/V8%20Win64%20-%20msvc/5765 Original change's description: > [wasm-simd] Implement remaining I8x16 SIMD ops on x64 > > - Implementation for I8x16 Shifts, and Mul > - Fix convert bug > - Enable all tests except for shuffle tests > > Change-Id: Id1a469d2883c30ea782c51d21dc462d211f94420 > Reviewed-on: https://chromium-review.googlesource.com/c/1318609 > Reviewed-by: Bill Budge <bbudge@chromium.org> > Commit-Queue: Deepti Gandluri <gdeepti@chromium.org> > Cr-Commit-Position: refs/heads/master@{#57254} TBR=bbudge@chromium.org,gdeepti@chromium.org Change-Id: I09efd8002e27f457e89250336e6c3a12d8d9682c No-Presubmit: true No-Tree-Checks: true No-Try: true Reviewed-on: https://chromium-review.googlesource.com/c/1318097 Reviewed-by: Deepti Gandluri <gdeepti@chromium.org> Commit-Queue: Deepti Gandluri <gdeepti@chromium.org> Cr-Commit-Position: refs/heads/master@{#57257}
2018-11-06 00:01:51 +00:00 · 2018-11-06 00:01:51 +00:00 · bdb9e7a517
commit bdb9e7a517
parent 64c668a224
6 changed files with 29 additions and 100 deletions
--- a/src/compiler/instruction-selector.cc
+++ b/src/compiler/instruction-selector.cc
@ -2481,6 +2481,14 @@ void InstructionSelector::VisitWord64AtomicCompareExchange(Node* node) {

 #if !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_MIPS && \
    !V8_TARGET_ARCH_MIPS64 && !V8_TARGET_ARCH_IA32
+void InstructionSelector::VisitI8x16Shl(Node* node) { UNIMPLEMENTED(); }
+
+void InstructionSelector::VisitI8x16ShrS(Node* node) { UNIMPLEMENTED(); }
+
+void InstructionSelector::VisitI8x16ShrU(Node* node) { UNIMPLEMENTED(); }
+
+void InstructionSelector::VisitI8x16Mul(Node* node) { UNIMPLEMENTED(); }
+
 void InstructionSelector::VisitS8x16Shuffle(Node* node) { UNIMPLEMENTED(); }
 #endif  // !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_MIPS
        // && !V8_TARGET_ARCH_MIPS64 && !V8_TARGET_ARCH_IA32
--- a/src/compiler/x64/code-generator-x64.cc
+++ b/src/compiler/x64/code-generator-x64.cc
@ -2683,37 +2683,6 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
      }
      break;
    }
-    case kX64I8x16Shl: {
-      XMMRegister dst = i.OutputSimd128Register();
-      DCHECK_EQ(dst, i.InputSimd128Register(0));
-      int8_t shift = i.InputInt8(1) & 0x7;
-      if (shift < 4) {
-        // For small shifts, doubling is faster.
-        for (int i = 0; i < shift; ++i) {
-          __ paddb(dst, dst);
-        }
-      } else {
-        // Mask off the unwanted bits before word-shifting.
-        __ pcmpeqw(kScratchDoubleReg, kScratchDoubleReg);
-        __ psrlw(kScratchDoubleReg, 8 + shift);
-        __ packuswb(kScratchDoubleReg, kScratchDoubleReg);
-        __ pand(dst, kScratchDoubleReg);
-        __ psllw(dst, shift);
-      }
-      break;
-    }
-    case kX64I8x16ShrS: {
-      XMMRegister dst = i.OutputSimd128Register();
-      XMMRegister src = i.InputSimd128Register(0);
-      int8_t shift = i.InputInt8(1) & 0x7;
-      // Unpack the bytes into words, do arithmetic shifts, and repack.
-      __ punpckhbw(kScratchDoubleReg, src);
-      __ punpcklbw(dst, src);
-      __ psraw(kScratchDoubleReg, 8 + shift);
-      __ psraw(dst, 8 + shift);
-      __ packsswb(dst, kScratchDoubleReg);
-      break;
-    }
    case kX64I8x16Add: {
      __ paddb(i.OutputSimd128Register(), i.InputSimd128Register(1));
      break;
@ -2730,39 +2699,6 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
      __ psubsb(i.OutputSimd128Register(), i.InputSimd128Register(1));
      break;
    }
-    case kX64I8x16Mul: {
-      XMMRegister dst = i.OutputSimd128Register();
-      DCHECK_EQ(dst, i.InputSimd128Register(0));
-      XMMRegister right = i.InputSimd128Register(1);
-      XMMRegister tmp = i.ToSimd128Register(instr->TempAt(0));
-      // I16x8 view of I8x16
-      // left = AAaa AAaa ... AAaa AAaa
-      // right= BBbb BBbb ... BBbb BBbb
-      // t = 00AA 00AA ... 00AA 00AA
-      // s = 00BB 00BB ... 00BB 00BB
-      __ movaps(tmp, dst);
-      __ movaps(kScratchDoubleReg, right);
-      __ psrlw(tmp, 8);
-      __ psrlw(kScratchDoubleReg, 8);
-      // dst = left * 256
-      __ psllw(dst, 8);
-      // t = I16x8Mul(t, s)
-      //    => __PP __PP ...  __PP  __PP
-      __ pmullw(tmp, kScratchDoubleReg);
-      // dst = I16x8Mul(left * 256, right)
-      //    => pp__ pp__ ...  pp__  pp__
-      __ pmullw(dst, right);
-      // t = I16x8Shl(t, 8)
-      //    => PP00 PP00 ...  PP00  PP00
-      __ psllw(tmp, 8);
-      // dst = I16x8Shr(dst, 8)
-      //    => 00pp 00pp ...  00pp  00pp
-      __ psrlw(dst, 8);
-      // dst = I16x8Or(dst, t)
-      //    => PPpp PPpp ...  PPpp  PPpp
-      __ por(dst, tmp);
-      break;
-    }
    case kX64I8x16MinS: {
      CpuFeatureScope sse_scope(tasm(), SSE4_1);
      __ pminsb(i.OutputSimd128Register(), i.InputSimd128Register(1));
@ -2807,18 +2743,6 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
      __ packuswb(dst, kScratchDoubleReg);
      break;
    }
-    case kX64I8x16ShrU: {
-      XMMRegister dst = i.OutputSimd128Register();
-      XMMRegister src = i.InputSimd128Register(0);
-      int8_t shift = i.InputInt8(1) & 0x7;
-      // Unpack the bytes into words, do logical shifts, and repack.
-      __ punpckhbw(kScratchDoubleReg, src);
-      __ punpcklbw(dst, src);
-      __ psrlw(kScratchDoubleReg, 8 + shift);
-      __ psrlw(dst, 8 + shift);
-      __ packuswb(dst, kScratchDoubleReg);
-      break;
-    }
    case kX64I8x16AddSaturateU: {
      __ paddusb(i.OutputSimd128Register(), i.InputSimd128Register(1));
      break;
--- a/src/compiler/x64/instruction-codes-x64.h
+++ b/src/compiler/x64/instruction-codes-x64.h
@ -230,13 +230,10 @@ namespace compiler {
  V(X64I8x16ReplaceLane)                  \
  V(X64I8x16SConvertI16x8)                \
  V(X64I8x16Neg)                          \
-  V(X64I8x16Shl)                          \
-  V(X64I8x16ShrS)                         \
  V(X64I8x16Add)                          \
  V(X64I8x16AddSaturateS)                 \
  V(X64I8x16Sub)                          \
  V(X64I8x16SubSaturateS)                 \
-  V(X64I8x16Mul)                          \
  V(X64I8x16MinS)                         \
  V(X64I8x16MaxS)                         \
  V(X64I8x16Eq)                           \
@ -246,17 +243,16 @@ namespace compiler {
  V(X64I8x16UConvertI16x8)                \
  V(X64I8x16AddSaturateU)                 \
  V(X64I8x16SubSaturateU)                 \
-  V(X64I8x16ShrU)                         \
  V(X64I8x16MinU)                         \
  V(X64I8x16MaxU)                         \
  V(X64I8x16GtU)                          \
  V(X64I8x16GeU)                          \
-  V(X64S128Zero)                          \
-  V(X64S128Not)                           \
  V(X64S128And)                           \
  V(X64S128Or)                            \
  V(X64S128Xor)                           \
+  V(X64S128Not)                           \
  V(X64S128Select)                        \
+  V(X64S128Zero)                          \
  V(X64S1x4AnyTrue)                       \
  V(X64S1x4AllTrue)                       \
  V(X64S1x8AnyTrue)                       \
--- a/src/compiler/x64/instruction-scheduler-x64.cc
+++ b/src/compiler/x64/instruction-scheduler-x64.cc
@ -207,13 +207,10 @@ int InstructionScheduler::GetTargetInstructionFlags(
    case kX64I8x16ReplaceLane:
    case kX64I8x16SConvertI16x8:
    case kX64I8x16Neg:
-    case kX64I8x16Shl:
-    case kX64I8x16ShrS:
    case kX64I8x16Add:
    case kX64I8x16AddSaturateS:
    case kX64I8x16Sub:
    case kX64I8x16SubSaturateS:
-    case kX64I8x16Mul:
    case kX64I8x16MinS:
    case kX64I8x16MaxS:
    case kX64I8x16Eq:
@ -223,7 +220,6 @@ int InstructionScheduler::GetTargetInstructionFlags(
    case kX64I8x16UConvertI16x8:
    case kX64I8x16AddSaturateU:
    case kX64I8x16SubSaturateU:
-    case kX64I8x16ShrU:
    case kX64I8x16MinU:
    case kX64I8x16MaxU:
    case kX64I8x16GtU:
--- a/src/compiler/x64/instruction-selector-x64.cc
+++ b/src/compiler/x64/instruction-selector-x64.cc
@ -2655,10 +2655,7 @@ VISIT_ATOMIC_BINOP(Xor)
  V(I32x4ShrU)                \
  V(I16x8Shl)                 \
  V(I16x8ShrS)                \
-  V(I16x8ShrU)                \
-  V(I8x16Shl)                 \
-  V(I8x16ShrS)                \
-  V(I8x16ShrU)
+  V(I16x8ShrU)

 #define SIMD_ANYTRUE_LIST(V) \
  V(S1x4AnyTrue)             \
@ -2780,9 +2777,8 @@ void InstructionSelector::VisitI32x4SConvertF32x4(Node* node) {

 void InstructionSelector::VisitI32x4UConvertF32x4(Node* node) {
  X64OperandGenerator g(this);
-  InstructionOperand temps[] = {g.TempSimd128Register()};
  Emit(kX64I32x4UConvertF32x4, g.DefineSameAsFirst(node),
-       g.UseRegister(node->InputAt(0)), arraysize(temps), temps);
+       g.UseRegister(node->InputAt(0)));
 }

 void InstructionSelector::VisitI16x8UConvertI32x4(Node* node) {
@ -2797,14 +2793,6 @@ void InstructionSelector::VisitI8x16UConvertI16x8(Node* node) {
       g.UseRegister(node->InputAt(0)), g.UseRegister(node->InputAt(1)));
 }

-void InstructionSelector::VisitI8x16Mul(Node* node) {
-  X64OperandGenerator g(this);
-  InstructionOperand temps[] = {g.TempSimd128Register()};
-  Emit(kX64I8x16Mul, g.DefineSameAsFirst(node),
-       g.UseUniqueRegister(node->InputAt(0)),
-       g.UseUniqueRegister(node->InputAt(1)), arraysize(temps), temps);
-}
-
 void InstructionSelector::VisitInt32AbsWithOverflow(Node* node) {
  UNREACHABLE();
 }
--- a/test/cctest/wasm/test-run-wasm-simd.cc
+++ b/test/cctest/wasm/test-run-wasm-simd.cc
@ -438,6 +438,8 @@ WASM_SIMD_TEST(F32x4ReplaceLane) {
  CHECK_EQ(1, r.Call(3.14159f, -1.5f));
 }

+#if V8_TARGET_ARCH_ARM || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_MIPS || \
+    V8_TARGET_ARCH_MIPS64 || V8_TARGET_ARCH_IA32
 // Tests both signed and unsigned conversion.
 WASM_SIMD_TEST(F32x4ConvertI32x4) {
  WasmRunner<int32_t, int32_t, float, float> r(execution_tier, lower_simd);
@ -461,6 +463,8 @@ WASM_SIMD_TEST(F32x4ConvertI32x4) {
                       static_cast<float>(static_cast<uint32_t>(*i))));
  }
 }
+#endif  // V8_TARGET_ARCH_ARM || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_MIPS ||
+        // V8_TARGET_ARCH_MIPS64 || V8_TARGET_ARCH_IA32

 void RunF32x4UnOpTest(ExecutionTier execution_tier, LowerSimd lower_simd,
                      WasmOpcode simd_op, FloatUnOp expected_op,
@ -815,6 +819,9 @@ WASM_SIMD_TEST(I8x16ReplaceLane) {
  CHECK_EQ(1, r.Call(1, 2));
 }

+#if V8_TARGET_ARCH_ARM || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_MIPS || \
+    V8_TARGET_ARCH_MIPS64 || V8_TARGET_ARCH_IA32
+
 int32_t ConvertToInt(double val, bool unsigned_integer) {
  if (std::isnan(val)) return 0;
  if (unsigned_integer) {
@ -893,6 +900,8 @@ WASM_SIMD_TEST(I32x4ConvertI16x8) {
    CHECK_EQ(1, r.Call(*i, unpacked_signed, unpacked_unsigned, 0));
  }
 }
+#endif  // V8_TARGET_ARCH_ARM || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_MIPS ||
+        // V8_TARGET_ARCH_MIPS64 || V8_TARGET_ARCH_IA32

 void RunI32x4UnOpTest(ExecutionTier execution_tier, LowerSimd lower_simd,
                      WasmOpcode simd_op, Int32UnOp expected_op) {
@ -1533,9 +1542,13 @@ WASM_SIMD_TEST(I8x16LeU) {
                        UnsignedLessEqual);
 }

+#if V8_TARGET_ARCH_ARM || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_MIPS || \
+    V8_TARGET_ARCH_MIPS64 || V8_TARGET_ARCH_IA32
 WASM_SIMD_TEST(I8x16Mul) {
  RunI8x16BinOpTest(execution_tier, lower_simd, kExprI8x16Mul, Mul);
 }
+#endif  // V8_TARGET_ARCH_ARM || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_MIPS ||
+        // V8_TARGET_ARCH_MIPS64 || V8_TARGET_ARCH_IA32

 void RunI8x16ShiftOpTest(ExecutionTier execution_tier, LowerSimd lower_simd,
                         WasmOpcode simd_op, Int8ShiftOp expected_op) {
@ -1553,6 +1566,8 @@ void RunI8x16ShiftOpTest(ExecutionTier execution_tier, LowerSimd lower_simd,
  }
 }

+#if V8_TARGET_ARCH_ARM || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_MIPS || \
+    V8_TARGET_ARCH_MIPS64 || V8_TARGET_ARCH_IA32
 WASM_SIMD_TEST(I8x16Shl) {
  RunI8x16ShiftOpTest(execution_tier, lower_simd, kExprI8x16Shl,
                      LogicalShiftLeft);
@ -1567,6 +1582,8 @@ WASM_SIMD_TEST(I8x16ShrU) {
  RunI8x16ShiftOpTest(execution_tier, lower_simd, kExprI8x16ShrU,
                      LogicalShiftRight);
 }
+#endif  // V8_TARGET_ARCH_ARM || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_MIPS ||
+        // V8_TARGET_ARCH_MIPS64 || V8_TARGET_ARCH_IA32

 // Test Select by making a mask where the 0th and 3rd lanes are true and the
 // rest false, and comparing for non-equality with zero to convert to a boolean