[wasm-simd][ia32] Implement saturating rounding multiply high

Implementation is the same as x64. Disassembly support for the new instruction, pmulhrsw, is already supported due to the macro list. Bug: v8:10971 Change-Id: I099c4f8c3da521006ef5e2b151626f25a5df1ed9 Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2620898 Reviewed-by: Bill Budge <bbudge@chromium.org> Commit-Queue: Zhi An Ng <zhin@chromium.org> Cr-Commit-Position: refs/heads/master@{#72021}
2021-01-11 04:40:26 +00:00 · 2021-01-11 04:40:26 +00:00 · d5ba8283a2
commit d5ba8283a2
parent c5fc192efb
8 changed files with 24 additions and 4 deletions
--- a/src/codegen/ia32/macro-assembler-ia32.h
+++ b/src/codegen/ia32/macro-assembler-ia32.h
@ -527,6 +527,7 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
  AVX_OP3_XO_SSE4(Pmaxsd, pmaxsd)
  AVX_OP3_WITH_TYPE_SCOPE(Pmaddubsw, pmaddubsw, XMMRegister, XMMRegister, SSSE3)
  AVX_OP3_WITH_TYPE_SCOPE(Pmulhrsw, pmulhrsw, XMMRegister, XMMRegister, SSSE3)
 #undef AVX_OP3_XO_SSE4
 #undef AVX_OP3_WITH_TYPE_SCOPE
--- a/src/codegen/ia32/sse-instr.h
+++ b/src/codegen/ia32/sse-instr.h
@ -70,7 +70,8 @@
  V(pmaddubsw, 66, 0F, 38, 04)    \
  V(psignb, 66, 0F, 38, 08)       \
  V(psignw, 66, 0F, 38, 09)       \
-  V(psignd, 66, 0F, 38, 0A)
+  V(psignd, 66, 0F, 38, 0A)       \
  V(pmulhrsw, 66, 0F, 38, 0B)
 // SSSE3 instructions whose AVX version has two operands.
 #define SSSE3_UNOP_INSTRUCTION_LIST(V) \
--- a/src/compiler/backend/ia32/code-generator-ia32.cc
+++ b/src/compiler/backend/ia32/code-generator-ia32.cc
@ -2328,6 +2328,19 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
      __ Pmaddubsw(dst, i.InputSimd128Register(0), kScratchDoubleReg);
      break;
    }
    case kIA32I16x8Q15MulRSatS: {
      XMMRegister dst = i.OutputSimd128Register();
      XMMRegister src0 = i.InputSimd128Register(0);
      XMMRegister src1 = i.InputSimd128Register(1);
      // k = i16x8.splat(0x8000)
      __ Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
      __ Psllw(kScratchDoubleReg, kScratchDoubleReg, byte{15});
      __ Pmulhrsw(dst, src0, src1);
      __ Pcmpeqw(kScratchDoubleReg, dst);
      __ Pxor(dst, kScratchDoubleReg);
      break;
    }
    case kIA32I32x4SignSelect: {
      ASSEMBLE_SIMD_SIGN_SELECT(blendvps);
      break;
--- a/src/compiler/backend/ia32/instruction-codes-ia32.h
+++ b/src/compiler/backend/ia32/instruction-codes-ia32.h
@ -297,6 +297,7 @@ namespace compiler {
  V(IA32I16x8ExtMulHighI8x16U)     \
  V(IA32I16x8ExtAddPairwiseI8x16S) \
  V(IA32I16x8ExtAddPairwiseI8x16U) \
  V(IA32I16x8Q15MulRSatS)          \
  V(IA32I8x16Splat)                \
  V(IA32I8x16ExtractLaneS)         \
  V(IA32Pinsrb)                    \
--- a/src/compiler/backend/ia32/instruction-scheduler-ia32.cc
+++ b/src/compiler/backend/ia32/instruction-scheduler-ia32.cc
@ -279,6 +279,7 @@ int InstructionScheduler::GetTargetInstructionFlags(
    case kIA32I16x8ExtMulHighI8x16U:
    case kIA32I16x8ExtAddPairwiseI8x16S:
    case kIA32I16x8ExtAddPairwiseI8x16U:
    case kIA32I16x8Q15MulRSatS:
    case kIA32I8x16Splat:
    case kIA32I8x16ExtractLaneS:
    case kIA32Pinsrb:
--- a/src/compiler/backend/ia32/instruction-selector-ia32.cc
+++ b/src/compiler/backend/ia32/instruction-selector-ia32.cc
@ -2256,6 +2256,7 @@ void InstructionSelector::VisitWord32AtomicPairCompareExchange(Node* node) {
  V(I16x8ExtMulHighI8x16S)                 \
  V(I16x8ExtMulLowI8x16U)                  \
  V(I16x8ExtMulHighI8x16U)                 \
  V(I16x8Q15MulRSatS)                      \
  V(I8x16RoundingAverageU)
 #define SIMD_UNOP_LIST(V)   \
--- a/src/compiler/backend/instruction-selector.cc
+++ b/src/compiler/backend/instruction-selector.cc
@ -2748,10 +2748,12 @@ void InstructionSelector::VisitI64x2Eq(Node* node) { UNIMPLEMENTED(); }
        // && !V8_TARGET_ARCH_IA32 && !V8_TARGET_ARCH_ARM
        // && !V8_TARGET_ARCH_MIPS64 && !V8_TARGET_ARCH_MIPS
-#if !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_X64
+#if !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_IA32
 // TODO(v8:10971) Prototype i16x8.q15mulr_sat_s
 void InstructionSelector::VisitI16x8Q15MulRSatS(Node* node) { UNIMPLEMENTED(); }
 #endif  // !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_IA32
 #if !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_X64
 // TODO(v8:10972) Prototype i64x2 widen i32x4.
 void InstructionSelector::VisitI64x2SConvertI32x4Low(Node* node) {
  UNIMPLEMENTED();
--- a/test/cctest/wasm/test-run-wasm-simd.cc
+++ b/test/cctest/wasm/test-run-wasm-simd.cc
@ -2328,14 +2328,14 @@ WASM_SIMD_TEST(I16x8RoundingAverageU) {
                              base::RoundingAverageUnsigned);
 }
-#if V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_X64
+#if V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_IA32
 // TODO(v8:10971) Prototype i16x8.q15mulr_sat_s
 WASM_SIMD_TEST_NO_LOWERING(I16x8Q15MulRSatS) {
  FLAG_SCOPE(wasm_simd_post_mvp);
  RunI16x8BinOpTest<int16_t>(execution_tier, lower_simd, kExprI16x8Q15MulRSatS,
                             SaturateRoundingQMul<int16_t>);
 }
-#endif  // V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_X64
+#endif  // V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_IA32
 namespace {
 enum class MulHalf { kLow, kHigh };