S390 [liftoff]: Implement simd fp32 to i32 conversion

I32x4UConvertF32x4 is also slightly optimized by removing 2 instructions. Change-Id: Ie61fbd34628beb2410ae3ef916de7c3119c7ad9c Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3429463 Reviewed-by: Junliang Yan <junyan@redhat.com> Commit-Queue: Milad Farazmand <mfarazma@redhat.com> Cr-Commit-Position: refs/heads/main@{#78907}
2022-02-01 15:40:45 -05:00 · 2022-02-01 15:40:45 -05:00 · 7f47473a39
commit 7f47473a39
parent b345204579
5 changed files with 50 additions and 43 deletions
--- a/src/codegen/s390/macro-assembler-s390.cc
+++ b/src/codegen/s390/macro-assembler-s390.cc
@ -5601,6 +5601,42 @@ void TurboAssembler::V128AnyTrue(Register dst, Simd128Register src,
  locgr(Condition(8), dst, scratch);
 }

+#define CONVERT_FLOAT_TO_INT32(convert, dst, src, scratch1, scratch2) \
+  for (int index = 0; index < 4; index++) {                           \
+    vlgv(scratch2, src, MemOperand(r0, index), Condition(2));         \
+    MovIntToFloat(scratch1, scratch2);                                \
+    convert(scratch2, scratch1, kRoundToZero);                        \
+    vlvg(dst, scratch2, MemOperand(r0, index), Condition(2));         \
+  }
+void TurboAssembler::I32x4SConvertF32x4(Simd128Register dst,
+                                        Simd128Register src,
+                                        Simd128Register scratch1,
+                                        Register scratch2) {
+  // NaN to 0.
+  vfce(scratch1, src, src, Condition(0), Condition(0), Condition(2));
+  vn(dst, src, scratch1, Condition(0), Condition(0), Condition(0));
+  if (CpuFeatures::IsSupported(VECTOR_ENHANCE_FACILITY_2)) {
+    vcgd(dst, dst, Condition(5), Condition(0), Condition(2));
+  } else {
+    CONVERT_FLOAT_TO_INT32(ConvertFloat32ToInt32, dst, dst, scratch1, scratch2)
+  }
+}
+
+void TurboAssembler::I32x4UConvertF32x4(Simd128Register dst,
+                                        Simd128Register src,
+                                        Simd128Register scratch1,
+                                        Register scratch2) {
+  // vclgd or ConvertFloat32ToUnsignedInt32 will convert NaN to 0, negative to 0
+  // automatically.
+  if (CpuFeatures::IsSupported(VECTOR_ENHANCE_FACILITY_2)) {
+    vclgd(dst, src, Condition(5), Condition(0), Condition(2));
+  } else {
+    CONVERT_FLOAT_TO_INT32(ConvertFloat32ToUnsignedInt32, dst, src, scratch1,
+                           scratch2)
+  }
+}
+#undef CONVERT_FLOAT_TO_INT32
+
 // Vector LE Load and Transform instructions.
 #ifdef V8_TARGET_BIG_ENDIAN
 #define IS_BIG_ENDIAN true
--- a/src/codegen/s390/macro-assembler-s390.h
+++ b/src/codegen/s390/macro-assembler-s390.h
@ -1114,6 +1114,10 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
  void I8x16BitMask(Register dst, Simd128Register src, Register scratch1,
                    Register scratch2, Simd128Register scratch3);
  void V128AnyTrue(Register dst, Simd128Register src, Register scratch);
+  void I32x4SConvertF32x4(Simd128Register dst, Simd128Register src,
+                          Simd128Register scratch1, Register scratch2);
+  void I32x4UConvertF32x4(Simd128Register dst, Simd128Register src,
+                          Simd128Register scratch1, Register scratch2);

 #define SIMD_UNOP_LIST(V)   \
  V(F64x2Abs)               \
--- a/src/compiler/backend/s390/code-generator-s390.cc
+++ b/src/compiler/backend/s390/code-generator-s390.cc
@ -2880,49 +2880,16 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
      break;
    }
    // vector conversions
-#define CONVERT_FLOAT_TO_INT32(convert)                             \
-  for (int index = 0; index < 4; index++) {                         \
-    __ vlgv(kScratchReg, kScratchDoubleReg, MemOperand(r0, index),  \
-            Condition(2));                                          \
-    __ MovIntToFloat(tempFPReg1, kScratchReg);                      \
-    __ convert(kScratchReg, tempFPReg1, kRoundToZero);              \
-    __ vlvg(dst, kScratchReg, MemOperand(r0, index), Condition(2)); \
-  }
    case kS390_I32x4SConvertF32x4: {
-      Simd128Register src = i.InputSimd128Register(0);
-      Simd128Register dst = i.OutputSimd128Register();
-      Simd128Register tempFPReg1 = i.ToDoubleRegister(instr->TempAt(0));
-      DCHECK_NE(dst, tempFPReg1);
-      // NaN to 0
-      __ vlr(kScratchDoubleReg, src, Condition(0), Condition(0), Condition(0));
-      __ vfce(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg,
-              Condition(0), Condition(0), Condition(2));
-      __ vn(kScratchDoubleReg, src, kScratchDoubleReg, Condition(0),
-            Condition(0), Condition(0));
-      if (CpuFeatures::IsSupported(VECTOR_ENHANCE_FACILITY_2)) {
-        __ vcgd(i.OutputSimd128Register(), kScratchDoubleReg, Condition(5),
-                Condition(0), Condition(2));
-      } else {
-        CONVERT_FLOAT_TO_INT32(ConvertFloat32ToInt32)
-      }
+      __ I32x4SConvertF32x4(i.OutputSimd128Register(),
+                            i.InputSimd128Register(0), kScratchDoubleReg,
+                            kScratchReg);
      break;
    }
    case kS390_I32x4UConvertF32x4: {
-      Simd128Register src = i.InputSimd128Register(0);
-      Simd128Register dst = i.OutputSimd128Register();
-      Simd128Register tempFPReg1 = i.ToDoubleRegister(instr->TempAt(0));
-      DCHECK_NE(dst, tempFPReg1);
-      // NaN to 0, negative to 0
-      __ vx(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg,
-            Condition(0), Condition(0), Condition(0));
-      __ vfmax(kScratchDoubleReg, src, kScratchDoubleReg, Condition(1),
-               Condition(0), Condition(2));
-      if (CpuFeatures::IsSupported(VECTOR_ENHANCE_FACILITY_2)) {
-        __ vclgd(i.OutputSimd128Register(), kScratchDoubleReg, Condition(5),
-                 Condition(0), Condition(2));
-      } else {
-        CONVERT_FLOAT_TO_INT32(ConvertFloat32ToUnsignedInt32)
-      }
+      __ I32x4UConvertF32x4(i.OutputSimd128Register(),
+                            i.InputSimd128Register(0), kScratchDoubleReg,
+                            kScratchReg);
      break;
    }
 #undef CONVERT_FLOAT_TO_INT32
--- a/src/compiler/backend/s390/instruction-selector-s390.cc
+++ b/src/compiler/backend/s390/instruction-selector-s390.cc
@ -2565,6 +2565,8 @@ void InstructionSelector::VisitWord64AtomicStore(Node* node) {
  V(I64x2AllTrue)            \
  V(I32x4Neg)                \
  V(I32x4Abs)                \
+  V(I32x4SConvertF32x4)      \
+  V(I32x4UConvertF32x4)      \
  V(I32x4SConvertI16x8Low)   \
  V(I32x4SConvertI16x8High)  \
  V(I32x4UConvertI16x8Low)   \
@ -2593,8 +2595,6 @@ void InstructionSelector::VisitWord64AtomicStore(Node* node) {
  V(V128AnyTrue)

 #define SIMD_UNOP_UNIQUE_REGISTER_LIST(V) \
-  V(I32x4SConvertF32x4)                   \
-  V(I32x4UConvertF32x4)                   \
  V(I32x4ExtAddPairwiseI16x8S)            \
  V(I32x4ExtAddPairwiseI16x8U)            \
  V(I16x8ExtAddPairwiseI8x16S)            \
--- a/src/wasm/baseline/s390/liftoff-assembler-s390.h
+++ b/src/wasm/baseline/s390/liftoff-assembler-s390.h
@ -2682,12 +2682,12 @@ void LiftoffAssembler::emit_s128_select(LiftoffRegister dst,

 void LiftoffAssembler::emit_i32x4_sconvert_f32x4(LiftoffRegister dst,
                                                 LiftoffRegister src) {
-  bailout(kSimd, "i32x4_sconvert_f32x4");
+  I32x4SConvertF32x4(dst.fp(), src.fp(), kScratchDoubleReg, r0);
 }

 void LiftoffAssembler::emit_i32x4_uconvert_f32x4(LiftoffRegister dst,
                                                 LiftoffRegister src) {
-  bailout(kSimd, "i32x4_uconvert_f32x4");
+  I32x4UConvertF32x4(dst.fp(), src.fp(), kScratchDoubleReg, r0);
 }

 void LiftoffAssembler::emit_f32x4_sconvert_i32x4(LiftoffRegister dst,