S390: [wasm-simd] Prototype extended pairwise addition

Bug: v8:11086 Change-Id: Ic59e270282b5b7f3d2f8e8b46586964c69e4447a Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2618289 Reviewed-by: Junliang Yan <junyan@redhat.com> Commit-Queue: Milad Fa <mfarazma@redhat.com> Cr-Commit-Position: refs/heads/master@{#71991}
2021-01-08 15:30:24 -05:00 · 2021-01-08 15:30:24 -05:00 · ff0b78bbfd
commit ff0b78bbfd
parent 4d90b88285
4 changed files with 86 additions and 22 deletions
--- a/src/compiler/backend/s390/code-generator-s390.cc
+++ b/src/compiler/backend/s390/code-generator-s390.cc
@ -4187,6 +4187,40 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
      break;
    }
 #undef ASSEMBLE_SIMD_I32X4_I16X8_EXT_MUL
+#define EXT_ADD_PAIRWISE(lane_size, mul_even, mul_odd)                        \
+  Simd128Register src = i.InputSimd128Register(0);                            \
+  Simd128Register dst = i.OutputSimd128Register();                            \
+  Simd128Register tempFPReg1 = i.ToSimd128Register(instr->TempAt(0));         \
+  __ vrepi(kScratchDoubleReg, Operand(1), Condition(lane_size));              \
+  __ mul_even(tempFPReg1, src, kScratchDoubleReg, Condition(0), Condition(0), \
+              Condition(lane_size));                                          \
+  __ mul_odd(kScratchDoubleReg, src, kScratchDoubleReg, Condition(0),         \
+             Condition(0), Condition(lane_size));                             \
+  __ va(dst, tempFPReg1, kScratchDoubleReg, Condition(0), Condition(0),       \
+        Condition(lane_size + 1));
+    case kS390_I32x4ExtAddPairwiseI16x8S: {
+      EXT_ADD_PAIRWISE(1, vme, vmo)
+      break;
+    }
+    case kS390_I32x4ExtAddPairwiseI16x8U: {
+      Simd128Register src0 = i.InputSimd128Register(0);
+      Simd128Register dst = i.OutputSimd128Register();
+      __ vx(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg,
+            Condition(0), Condition(0), Condition(3));
+      __ vsum(dst, src0, kScratchDoubleReg, Condition(0), Condition(0),
+              Condition(1));
+
+      break;
+    }
+    case kS390_I16x8ExtAddPairwiseI8x16S: {
+      EXT_ADD_PAIRWISE(0, vme, vmo)
+      break;
+    }
+    case kS390_I16x8ExtAddPairwiseI8x16U: {
+      EXT_ADD_PAIRWISE(0, vmle, vmlo)
+      break;
+    }
+#undef EXT_ADD_PAIRWISE
    case kS390_StoreCompressTagged: {
      CHECK(!instr->HasOutput());
      size_t index = 0;
--- a/src/compiler/backend/s390/instruction-codes-s390.h
+++ b/src/compiler/backend/s390/instruction-codes-s390.h
@ -294,6 +294,8 @@ namespace compiler {
  V(S390_I32x4ExtMulHighI16x8S)             \
  V(S390_I32x4ExtMulLowI16x8U)              \
  V(S390_I32x4ExtMulHighI16x8U)             \
+  V(S390_I32x4ExtAddPairwiseI16x8S)         \
+  V(S390_I32x4ExtAddPairwiseI16x8U)         \
  V(S390_I16x8Splat)                        \
  V(S390_I16x8ExtractLaneU)                 \
  V(S390_I16x8ExtractLaneS)                 \
@ -333,6 +335,8 @@ namespace compiler {
  V(S390_I16x8ExtMulHighI8x16S)             \
  V(S390_I16x8ExtMulLowI8x16U)              \
  V(S390_I16x8ExtMulHighI8x16U)             \
+  V(S390_I16x8ExtAddPairwiseI8x16S)         \
+  V(S390_I16x8ExtAddPairwiseI8x16U)         \
  V(S390_I8x16Splat)                        \
  V(S390_I8x16ExtractLaneU)                 \
  V(S390_I8x16ExtractLaneS)                 \
--- a/src/compiler/backend/s390/instruction-scheduler-s390.cc
+++ b/src/compiler/backend/s390/instruction-scheduler-s390.cc
@ -240,6 +240,8 @@ int InstructionScheduler::GetTargetInstructionFlags(
    case kS390_I32x4ExtMulHighI16x8S:
    case kS390_I32x4ExtMulLowI16x8U:
    case kS390_I32x4ExtMulHighI16x8U:
+    case kS390_I32x4ExtAddPairwiseI16x8S:
+    case kS390_I32x4ExtAddPairwiseI16x8U:
    case kS390_I16x8Splat:
    case kS390_I16x8ExtractLaneU:
    case kS390_I16x8ExtractLaneS:
@ -279,6 +281,8 @@ int InstructionScheduler::GetTargetInstructionFlags(
    case kS390_I16x8ExtMulHighI8x16S:
    case kS390_I16x8ExtMulLowI8x16U:
    case kS390_I16x8ExtMulHighI8x16U:
+    case kS390_I16x8ExtAddPairwiseI8x16S:
+    case kS390_I16x8ExtAddPairwiseI8x16U:
    case kS390_I8x16Splat:
    case kS390_I8x16ExtractLaneU:
    case kS390_I8x16ExtractLaneS:
--- a/src/execution/s390/simulator-s390.cc
+++ b/src/execution/s390/simulator-s390.cc
@ -765,7 +765,9 @@ void Simulator::EvalTableInit() {
  V(vs, VS, 0xE7F7)       /* type = VRR_C VECTOR SUBTRACT  */                  \
  V(vml, VML, 0xE7A2)     /* type = VRR_C VECTOR MULTIPLY LOW  */              \
  V(vme, VME, 0xE7A6)     /* type = VRR_C VECTOR MULTIPLY EVEN  */             \
+  V(vmle, VMLE, 0xE7A4)   /* type = VRR_C VECTOR MULTIPLY EVEN LOGICAL */      \
  V(vmo, VMO, 0xE7A7)     /* type = VRR_C VECTOR MULTIPLY ODD  */              \
+  V(vmlo, VMLO, 0xE7A75)  /* type = VRR_C VECTOR MULTIPLY LOGICAL ODD */       \
  V(vnc, VNC, 0xE769)     /* type = VRR_C VECTOR AND WITH COMPLEMENT */        \
  V(vsum, VSUM, 0xE764)   /* type = VRR_C VECTOR SUM ACROSS WORD  */           \
  V(vsumg, VSUMG, 0xE765) /* type = VRR_C VECTOR SUM ACROSS DOUBLEWORD  */     \
@ -3220,29 +3222,32 @@ EVALUATE(VML) {
    input_type src1 = get_simd_register_by_lane<input_type>(r3, i);        \
    set_simd_register_by_lane<result_type>(r1, k, src0 * src1);            \
  }
-#define VECTOR_MULTIPLY_EVEN_ODD(r1, r2, r3, is_odd)                      \
-  switch (m4) {                                                           \
-    case 0: {                                                             \
-      VECTOR_MULTIPLY_EVEN_ODD_TYPE(r1, r2, r3, int8_t, int16_t, is_odd)  \
-      break;                                                              \
-    }                                                                     \
-    case 1: {                                                             \
-      VECTOR_MULTIPLY_EVEN_ODD_TYPE(r1, r2, r3, int16_t, int32_t, is_odd) \
-      break;                                                              \
-    }                                                                     \
-    case 2: {                                                             \
-      VECTOR_MULTIPLY_EVEN_ODD_TYPE(r1, r2, r3, int32_t, int64_t, is_odd) \
-      break;                                                              \
-    }                                                                     \
-    default:                                                              \
-      UNREACHABLE();                                                      \
+#define VECTOR_MULTIPLY_EVEN_ODD(r1, r2, r3, is_odd, sign)                    \
+  switch (m4) {                                                               \
+    case 0: {                                                                 \
+      VECTOR_MULTIPLY_EVEN_ODD_TYPE(r1, r2, r3, sign##int8_t, sign##int16_t,  \
+                                    is_odd)                                   \
+      break;                                                                  \
+    }                                                                         \
+    case 1: {                                                                 \
+      VECTOR_MULTIPLY_EVEN_ODD_TYPE(r1, r2, r3, sign##int16_t, sign##int32_t, \
+                                    is_odd)                                   \
+      break;                                                                  \
+    }                                                                         \
+    case 2: {                                                                 \
+      VECTOR_MULTIPLY_EVEN_ODD_TYPE(r1, r2, r3, sign##int32_t, sign##int64_t, \
+                                    is_odd)                                   \
+      break;                                                                  \
+    }                                                                         \
+    default:                                                                  \
+      UNREACHABLE();                                                          \
  }
 EVALUATE(VME) {
  DCHECK_OPCODE(VME);
  DECODE_VRR_C_INSTRUCTION(r1, r2, r3, m6, m5, m4);
  USE(m5);
  USE(m6);
-  VECTOR_MULTIPLY_EVEN_ODD(r1, r2, r3, false)
+  VECTOR_MULTIPLY_EVEN_ODD(r1, r2, r3, false, )
  return length;
 }

@ -3251,7 +3256,24 @@ EVALUATE(VMO) {
  DECODE_VRR_C_INSTRUCTION(r1, r2, r3, m6, m5, m4);
  USE(m5);
  USE(m6);
-  VECTOR_MULTIPLY_EVEN_ODD(r1, r2, r3, true)
+  VECTOR_MULTIPLY_EVEN_ODD(r1, r2, r3, true, )
+  return length;
+}
+EVALUATE(VMLE) {
+  DCHECK_OPCODE(VMLE);
+  DECODE_VRR_C_INSTRUCTION(r1, r2, r3, m6, m5, m4);
+  USE(m5);
+  USE(m6);
+  VECTOR_MULTIPLY_EVEN_ODD(r1, r2, r3, false, u)
+  return length;
+}
+
+EVALUATE(VMLO) {
+  DCHECK_OPCODE(VMLO);
+  DECODE_VRR_C_INSTRUCTION(r1, r2, r3, m6, m5, m4);
+  USE(m5);
+  USE(m6);
+  VECTOR_MULTIPLY_EVEN_ODD(r1, r2, r3, true, u)
  return length;
 }
 #undef VECTOR_MULTIPLY_EVEN_ODD
@ -3295,8 +3317,8 @@ EVALUATE(VSUM) {
  USE(m6);
  USE(m5);
  switch (m4) {
-    CASE(1, int8_t, int32_t);
-    CASE(2, int16_t, int32_t);
+    CASE(0, uint8_t, uint32_t);
+    CASE(1, uint16_t, uint32_t);
    default:
      UNREACHABLE();
  }
@ -3309,8 +3331,8 @@ EVALUATE(VSUMG) {
  USE(m6);
  USE(m5);
  switch (m4) {
-    CASE(1, int16_t, int64_t);
-    CASE(2, int32_t, int64_t);
+    CASE(1, uint16_t, uint64_t);
+    CASE(2, uint32_t, uint64_t);
    default:
      UNREACHABLE();
  }