Revert "[wasm-simd][x64] Prototype i32x4.dot_i16x8_s"

This reverts commit 3692bef9f9. Reason for revert: https://ci.chromium.org/p/v8/builders/ci/V8%20Linux64%20UBSan/11514? Original change's description: > [wasm-simd][x64] Prototype i32x4.dot_i16x8_s > > This implements I32x4DotI16x8S for x64 and interpreter. > > Bug: v8:10583 > Change-Id: I404ac68c19c1686a93f29c3f4fc2d661c9558c67 > Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2229056 > Reviewed-by: Tobias Tebbi <tebbi@chromium.org> > Reviewed-by: Deepti Gandluri <gdeepti@chromium.org> > Commit-Queue: Zhi An Ng <zhin@chromium.org> > Cr-Commit-Position: refs/heads/master@{#68244} TBR=gdeepti@chromium.org,tebbi@chromium.org,zhin@chromium.org Change-Id: I8760d480a783ba6c8a2ec2eaeb0131c7d4e11159 No-Presubmit: true No-Tree-Checks: true No-Try: true Bug: v8:10583 Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2236961 Reviewed-by: Zhi An Ng <zhin@chromium.org> Commit-Queue: Zhi An Ng <zhin@chromium.org> Cr-Commit-Position: refs/heads/master@{#68245}
2020-06-09 04:00:22 +00:00 · 2020-06-09 04:00:22 +00:00 · 00fb782b16
commit 00fb782b16
parent 3692bef9f9
16 changed files with 1 additions and 70 deletions
--- a/src/codegen/x64/macro-assembler-x64.h
+++ b/src/codegen/x64/macro-assembler-x64.h
@ -202,7 +202,6 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
  AVX_OP(Psrlw, psrlw)
  AVX_OP(Psrld, psrld)
  AVX_OP(Psrlq, psrlq)
-  AVX_OP(Pmaddwd, pmaddwd)
  AVX_OP(Paddb, paddb)
  AVX_OP(Paddw, paddw)
  AVX_OP(Paddd, paddd)
--- a/src/codegen/x64/sse-instr.h
+++ b/src/codegen/x64/sse-instr.h
@ -57,7 +57,6 @@
  V(packssdw, 66, 0F, 6B)        \
  V(punpcklqdq, 66, 0F, 6C)      \
  V(punpckhqdq, 66, 0F, 6D)      \
-  V(pmaddwd, 66, 0F, F5)         \
  V(paddb, 66, 0F, FC)           \
  V(paddw, 66, 0F, FD)           \
  V(paddd, 66, 0F, FE)           \
--- a/src/compiler/backend/instruction-selector.cc
+++ b/src/compiler/backend/instruction-selector.cc
@ -2057,8 +2057,6 @@ void InstructionSelector::VisitNode(Node* node) {
      return MarkAsSimd128(node), VisitI32x4Abs(node);
    case IrOpcode::kI32x4BitMask:
      return MarkAsWord32(node), VisitI32x4BitMask(node);
-    case IrOpcode::kI32x4DotI16x8S:
-      return MarkAsSimd128(node), VisitI32x4DotI16x8S(node);
    case IrOpcode::kI16x8Splat:
      return MarkAsSimd128(node), VisitI16x8Splat(node);
    case IrOpcode::kI16x8ExtractLaneU:
@ -2697,11 +2695,6 @@ void InstructionSelector::VisitF32x4Trunc(Node* node) { UNIMPLEMENTED(); }
 void InstructionSelector::VisitF32x4NearestInt(Node* node) { UNIMPLEMENTED(); }
 #endif  // !V8_TARGET_ARCH_X64

-#if !V8_TARGET_ARCH_X64
-// TODO(v8:10583) Prototype i32x4.dot_i16x8_s
-void InstructionSelector::VisitI32x4DotI16x8S(Node* node) { UNIMPLEMENTED(); }
-#endif  // !V8_TARGET_ARCH_X64
-
 void InstructionSelector::VisitFinishRegion(Node* node) { EmitIdentity(node); }

 void InstructionSelector::VisitParameter(Node* node) {
--- a/src/compiler/backend/x64/code-generator-x64.cc
+++ b/src/compiler/backend/x64/code-generator-x64.cc
@ -3187,10 +3187,6 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
      __ Movmskps(i.OutputRegister(), i.InputSimd128Register(0));
      break;
    }
-    case kX64I32x4DotI16x8S: {
-      __ Pmaddwd(i.OutputSimd128Register(), i.InputSimd128Register(1));
-      break;
-    }
    case kX64S128Zero: {
      XMMRegister dst = i.OutputSimd128Register();
      __ Xorps(dst, dst);
--- a/src/compiler/backend/x64/instruction-codes-x64.h
+++ b/src/compiler/backend/x64/instruction-codes-x64.h
@ -250,7 +250,6 @@ namespace compiler {
  V(X64I32x4GeU)                          \
  V(X64I32x4Abs)                          \
  V(X64I32x4BitMask)                      \
-  V(X64I32x4DotI16x8S)                    \
  V(X64I16x8Splat)                        \
  V(X64I16x8ExtractLaneU)                 \
  V(X64I16x8ExtractLaneS)                 \
--- a/src/compiler/backend/x64/instruction-scheduler-x64.cc
+++ b/src/compiler/backend/x64/instruction-scheduler-x64.cc
@ -222,7 +222,6 @@ int InstructionScheduler::GetTargetInstructionFlags(
    case kX64I32x4GeU:
    case kX64I32x4Abs:
    case kX64I32x4BitMask:
-    case kX64I32x4DotI16x8S:
    case kX64I16x8Splat:
    case kX64I16x8ExtractLaneU:
    case kX64I16x8ExtractLaneS:
--- a/src/compiler/backend/x64/instruction-selector-x64.cc
+++ b/src/compiler/backend/x64/instruction-selector-x64.cc
@ -2700,7 +2700,6 @@ VISIT_ATOMIC_BINOP(Xor)
  V(I32x4MinU)             \
  V(I32x4MaxU)             \
  V(I32x4GeU)              \
-  V(I32x4DotI16x8S)        \
  V(I16x8SConvertI32x4)    \
  V(I16x8Add)              \
  V(I16x8AddSaturateS)     \
--- a/src/compiler/machine-operator.cc
+++ b/src/compiler/machine-operator.cc
@ -416,7 +416,6 @@ ShiftKind ShiftKindOf(Operator const* op) {
  V(I32x4GeU, Operator::kNoProperties, 2, 0, 1)                            \
  V(I32x4Abs, Operator::kNoProperties, 1, 0, 1)                            \
  V(I32x4BitMask, Operator::kNoProperties, 1, 0, 1)                        \
-  V(I32x4DotI16x8S, Operator::kCommutative, 2, 0, 1)                       \
  V(I16x8Splat, Operator::kNoProperties, 1, 0, 1)                          \
  V(I16x8SConvertI8x16Low, Operator::kNoProperties, 1, 0, 1)               \
  V(I16x8SConvertI8x16High, Operator::kNoProperties, 1, 0, 1)              \
--- a/src/compiler/machine-operator.h
+++ b/src/compiler/machine-operator.h
@ -664,7 +664,6 @@ class V8_EXPORT_PRIVATE MachineOperatorBuilder final
  const Operator* I32x4GeU();
  const Operator* I32x4Abs();
  const Operator* I32x4BitMask();
-  const Operator* I32x4DotI16x8S();

  const Operator* I16x8Splat();
  const Operator* I16x8ExtractLaneU(int32_t);
--- a/src/compiler/opcodes.h
+++ b/src/compiler/opcodes.h
@ -855,7 +855,6 @@
  V(I32x4GeU)                   \
  V(I32x4Abs)                   \
  V(I32x4BitMask)               \
-  V(I32x4DotI16x8S)             \
  V(I16x8Splat)                 \
  V(I16x8ExtractLaneU)          \
  V(I16x8ExtractLaneS)          \
--- a/src/compiler/wasm-compiler.cc
+++ b/src/compiler/wasm-compiler.cc
@ -4425,9 +4425,6 @@ Node* WasmGraphBuilder::SimdOp(wasm::WasmOpcode opcode, Node* const* inputs) {
      return graph()->NewNode(mcgraph()->machine()->I32x4Abs(), inputs[0]);
    case wasm::kExprI32x4BitMask:
      return graph()->NewNode(mcgraph()->machine()->I32x4BitMask(), inputs[0]);
-    case wasm::kExprI32x4DotI16x8S:
-      return graph()->NewNode(mcgraph()->machine()->I32x4DotI16x8S(), inputs[0],
-                              inputs[1]);
    case wasm::kExprI16x8Splat:
      return graph()->NewNode(mcgraph()->machine()->I16x8Splat(), inputs[0]);
    case wasm::kExprI16x8SConvertI8x16Low:
--- a/src/diagnostics/x64/disasm-x64.cc
+++ b/src/diagnostics/x64/disasm-x64.cc
@ -2133,8 +2133,6 @@ int DisassemblerX64::TwoByteOpcodeInstruction(byte* data) {
          mnemonic = "psllq";
        } else if (opcode == 0xF4) {
          mnemonic = "pmuludq";
-        } else if (opcode == 0xF5) {
-          mnemonic = "pmaddwd";
        } else if (opcode == 0xF8) {
          mnemonic = "psubb";
        } else if (opcode == 0xF9) {
--- a/src/wasm/wasm-interpreter.cc
+++ b/src/wasm/wasm-interpreter.cc
@ -2587,19 +2587,6 @@ class WasmInterpreterInternals {
        ADD_HORIZ_CASE(F32x4AddHoriz, f32x4, float4, 4)
        ADD_HORIZ_CASE(I16x8AddHoriz, i16x8, int8, 8)
 #undef ADD_HORIZ_CASE
-      case kExprI32x4DotI16x8S: {
-        int8 v2 = Pop().to_s128().to_i16x8();
-        int8 v1 = Pop().to_s128().to_i16x8();
-        int4 res;
-        for (size_t i = 0; i < 4; i++) {
-          int32_t lo = (v1.val[LANE(i * 2, v1)] * v2.val[LANE(i * 2, v2)]);
-          int32_t hi =
-              (v1.val[LANE(i * 2 + 1, v1)] * v2.val[LANE(i * 2 + 1, v2)]);
-          res.val[LANE(i, res)] = lo + hi;
-        }
-        Push(WasmValue(Simd128(res)));
-        return true;
-      }
      case kExprS8x16Swizzle: {
        int16 v2 = Pop().to_s128().to_i8x16();
        int16 v1 = Pop().to_s128().to_i8x16();
--- a/src/wasm/wasm-opcodes.cc
+++ b/src/wasm/wasm-opcodes.cc
@ -335,8 +335,6 @@ const char* WasmOpcodes::OpcodeName(WasmOpcode opcode) {
    CASE_F64x2_OP(Trunc, "trunc")
    CASE_F64x2_OP(NearestInt, "nearest")

-    CASE_I32x4_OP(DotI16x8S, "dot_i16x8_s")
-
    // Atomic operations.
    CASE_OP(AtomicNotify, "atomic.notify")
    CASE_INT_OP(AtomicWait, "atomic.wait")
--- a/src/wasm/wasm-opcodes.h
+++ b/src/wasm/wasm-opcodes.h
@ -470,10 +470,9 @@ bool IsJSCompatibleSignature(const FunctionSig* sig, const WasmFeatures&);
  V(F64x2Qfms, 0xfdff, s_sss)           \
  V(I16x8AddHoriz, 0xfdaf, s_ss)        \
  V(I32x4AddHoriz, 0xfdb0, s_ss)        \
-  V(I32x4DotI16x8S, 0xfdba, s_ss)       \
  V(F32x4AddHoriz, 0xfdb2, s_ss)        \
  V(F32x4RecipApprox, 0xfdb3, s_s)      \
-  V(F32x4RecipSqrtApprox, 0xfdbc, s_s)  \
+  V(F32x4RecipSqrtApprox, 0xfdba, s_s)  \
  V(F32x4Pmin, 0xfdea, s_ss)            \
  V(F32x4Pmax, 0xfdeb, s_ss)            \
  V(F64x2Pmin, 0xfdf6, s_ss)            \
--- a/test/cctest/wasm/test-run-wasm-simd.cc
+++ b/test/cctest/wasm/test-run-wasm-simd.cc
@ -2306,35 +2306,6 @@ WASM_SIMD_TEST(I16x8RoundingAverageU) {
                              base::RoundingAverageUnsigned);
 }

-// TODO(v8:10583) Prototype i32x4.dot_i16x8_s
-#if V8_TARGET_ARCH_X64
-WASM_SIMD_TEST_NO_LOWERING(I32x4DotI16x8S) {
-  FLAG_SCOPE(wasm_simd_post_mvp);
-
-  WasmRunner<int32_t, int16_t, int16_t> r(execution_tier, lower_simd);
-  int32_t* g = r.builder().template AddGlobal<int32_t>(kWasmS128);
-  byte value1 = 0, value2 = 1;
-  byte temp1 = r.AllocateLocal(kWasmS128);
-  byte temp2 = r.AllocateLocal(kWasmS128);
-  BUILD(r, WASM_SET_LOCAL(temp1, WASM_SIMD_I16x8_SPLAT(WASM_GET_LOCAL(value1))),
-        WASM_SET_LOCAL(temp2, WASM_SIMD_I16x8_SPLAT(WASM_GET_LOCAL(value2))),
-        WASM_SET_GLOBAL(
-            0, WASM_SIMD_BINOP(kExprI32x4DotI16x8S, WASM_GET_LOCAL(temp1),
-                               WASM_GET_LOCAL(temp2))),
-        WASM_ONE);
-
-  for (int16_t x : compiler::ValueHelper::GetVector<int16_t>()) {
-    for (int16_t y : compiler::ValueHelper::GetVector<int16_t>()) {
-      r.Call(x, y);
-      int32_t expected = x * y * 2;
-      for (int i = 0; i < 4; i++) {
-        CHECK_EQ(expected, ReadLittleEndianValue<int32_t>(&g[i]));
-      }
-    }
-  }
-}
-#endif  // V8_TARGET_ARCH_X64
-
 void RunI16x8ShiftOpTest(ExecutionTier execution_tier, LowerSimd lower_simd,
                         WasmOpcode opcode, Int16ShiftOp expected_op) {
  // Intentionally shift by 16, should be no-op.