[wasm-simd] Implement f64x2 comparisons for arm

Bug: v8:9813 Change-Id: I716ed7c2802c38a4b4c8973db4e3bc50e16cec39 Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/1872930 Reviewed-by: Deepti Gandluri <gdeepti@chromium.org> Commit-Queue: Zhi An Ng <zhin@chromium.org> Cr-Commit-Position: refs/heads/master@{#64788}
2019-10-30 11:06:29 -07:00 · 2019-10-30 11:06:29 -07:00 · b6edadc09b
commit b6edadc09b
parent 92a5b3998d
6 changed files with 94 additions and 11 deletions
--- a/src/compiler/backend/arm/code-generator-arm.cc
+++ b/src/compiler/backend/arm/code-generator-arm.cc
@ -1837,6 +1837,78 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
      break;
    }
 #undef ASSEMBLE_F64X2_ARITHMETIC_BINOP
+    case kArmF64x2Eq: {
+      UseScratchRegisterScope temps(tasm());
+      Register scratch = temps.Acquire();
+      __ mov(scratch, Operand(0));
+      __ VFPCompareAndSetFlags(i.InputSimd128Register(0).low(),
+                               i.InputSimd128Register(1).low());
+      __ mov(scratch, Operand(-1), LeaveCC, eq);
+      __ vmov(i.OutputSimd128Register().low(), scratch, scratch);
+
+      __ mov(scratch, Operand(0));
+      __ VFPCompareAndSetFlags(i.InputSimd128Register(0).high(),
+                               i.InputSimd128Register(1).high());
+      __ mov(scratch, Operand(-1), LeaveCC, eq);
+      __ vmov(i.OutputSimd128Register().high(), scratch, scratch);
+      break;
+    }
+    case kArmF64x2Ne: {
+      UseScratchRegisterScope temps(tasm());
+      Register scratch = temps.Acquire();
+      __ mov(scratch, Operand(0));
+      __ VFPCompareAndSetFlags(i.InputSimd128Register(0).low(),
+                               i.InputSimd128Register(1).low());
+      __ mov(scratch, Operand(-1), LeaveCC, ne);
+      __ vmov(i.OutputSimd128Register().low(), scratch, scratch);
+
+      __ mov(scratch, Operand(0));
+      __ VFPCompareAndSetFlags(i.InputSimd128Register(0).high(),
+                               i.InputSimd128Register(1).high());
+      __ mov(scratch, Operand(-1), LeaveCC, ne);
+      __ vmov(i.OutputSimd128Register().high(), scratch, scratch);
+      break;
+    }
+    case kArmF64x2Lt: {
+      UseScratchRegisterScope temps(tasm());
+      Register scratch = temps.Acquire();
+      __ mov(scratch, Operand(0));
+      __ VFPCompareAndSetFlags(i.InputSimd128Register(0).low(),
+                               i.InputSimd128Register(1).low());
+      __ mov(scratch, Operand(-1), LeaveCC, lt);
+      // Check for NaN.
+      __ mov(scratch, Operand(0), LeaveCC, vs);
+      __ vmov(i.OutputSimd128Register().low(), scratch, scratch);
+
+      __ mov(scratch, Operand(0));
+      __ VFPCompareAndSetFlags(i.InputSimd128Register(0).high(),
+                               i.InputSimd128Register(1).high());
+      __ mov(scratch, Operand(-1), LeaveCC, lt);
+      // Check for NaN.
+      __ mov(scratch, Operand(0), LeaveCC, vs);
+      __ vmov(i.OutputSimd128Register().high(), scratch, scratch);
+      break;
+    }
+    case kArmF64x2Le: {
+      UseScratchRegisterScope temps(tasm());
+      Register scratch = temps.Acquire();
+      __ mov(scratch, Operand(0));
+      __ VFPCompareAndSetFlags(i.InputSimd128Register(0).low(),
+                               i.InputSimd128Register(1).low());
+      __ mov(scratch, Operand(-1), LeaveCC, le);
+      // Check for NaN.
+      __ mov(scratch, Operand(0), LeaveCC, vs);
+      __ vmov(i.OutputSimd128Register().low(), scratch, scratch);
+
+      __ mov(scratch, Operand(0));
+      __ VFPCompareAndSetFlags(i.InputSimd128Register(0).high(),
+                               i.InputSimd128Register(1).high());
+      __ mov(scratch, Operand(-1), LeaveCC, le);
+      // Check for NaN.
+      __ mov(scratch, Operand(0), LeaveCC, vs);
+      __ vmov(i.OutputSimd128Register().high(), scratch, scratch);
+      break;
+    }
    case kArmF32x4Splat: {
      int src_code = i.InputFloatRegister(0).code();
      __ vdup(Neon32, i.OutputSimd128Register(),
--- a/src/compiler/backend/arm/instruction-codes-arm.h
+++ b/src/compiler/backend/arm/instruction-codes-arm.h
@ -138,6 +138,10 @@ namespace compiler {
  V(ArmF64x2Sub)                   \
  V(ArmF64x2Mul)                   \
  V(ArmF64x2Div)                   \
+  V(ArmF64x2Eq)                    \
+  V(ArmF64x2Ne)                    \
+  V(ArmF64x2Lt)                    \
+  V(ArmF64x2Le)                    \
  V(ArmF32x4Splat)                 \
  V(ArmF32x4ExtractLane)           \
  V(ArmF32x4ReplaceLane)           \
--- a/src/compiler/backend/arm/instruction-scheduler-arm.cc
+++ b/src/compiler/backend/arm/instruction-scheduler-arm.cc
@ -118,6 +118,10 @@ int InstructionScheduler::GetTargetInstructionFlags(
    case kArmF64x2Sub:
    case kArmF64x2Mul:
    case kArmF64x2Div:
+    case kArmF64x2Eq:
+    case kArmF64x2Ne:
+    case kArmF64x2Lt:
+    case kArmF64x2Le:
    case kArmF32x4Splat:
    case kArmF32x4ExtractLane:
    case kArmF32x4ReplaceLane:
--- a/src/compiler/backend/arm/instruction-selector-arm.cc
+++ b/src/compiler/backend/arm/instruction-selector-arm.cc
@ -2448,6 +2448,10 @@ void InstructionSelector::VisitWord32AtomicPairCompareExchange(Node* node) {
  V(F64x2Sub, kArmF64x2Sub)                     \
  V(F64x2Mul, kArmF64x2Mul)                     \
  V(F64x2Div, kArmF64x2Div)                     \
+  V(F64x2Eq, kArmF64x2Eq)                       \
+  V(F64x2Ne, kArmF64x2Ne)                       \
+  V(F64x2Lt, kArmF64x2Lt)                       \
+  V(F64x2Le, kArmF64x2Le)                       \
  V(F32x4Add, kArmF32x4Add)                     \
  V(F32x4AddHoriz, kArmF32x4AddHoriz)           \
  V(F32x4Sub, kArmF32x4Sub)                     \
--- a/src/compiler/backend/instruction-selector.cc
+++ b/src/compiler/backend/instruction-selector.cc
@ -2670,10 +2670,6 @@ void InstructionSelector::VisitF64x2UConvertI64x2(Node* node) {
 }
 #if !V8_TARGET_ARCH_ARM64
 #if !V8_TARGET_ARCH_IA32
-void InstructionSelector::VisitF64x2Eq(Node* node) { UNIMPLEMENTED(); }
-void InstructionSelector::VisitF64x2Ne(Node* node) { UNIMPLEMENTED(); }
-void InstructionSelector::VisitF64x2Lt(Node* node) { UNIMPLEMENTED(); }
-void InstructionSelector::VisitF64x2Le(Node* node) { UNIMPLEMENTED(); }
 void InstructionSelector::VisitF64x2Min(Node* node) { UNIMPLEMENTED(); }
 void InstructionSelector::VisitF64x2Max(Node* node) { UNIMPLEMENTED(); }
 void InstructionSelector::VisitI64x2Neg(Node* node) { UNIMPLEMENTED(); }
--- a/test/cctest/wasm/test-run-wasm-simd.cc
+++ b/test/cctest/wasm/test-run-wasm-simd.cc
@ -286,7 +286,6 @@ T Sqrt(T a) {
  return std::sqrt(a);
 }

-#if V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_IA32
 // only used for F64x2 tests below
 int64_t Equal(double a, double b) { return a == b ? -1 : 0; }

@ -384,7 +383,6 @@ bool ExpectFused(ExecutionTier tier) {
 #endif
 }
 #endif  // V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64
-#endif  // V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_IA32

 }  // namespace

@ -1368,7 +1366,6 @@ WASM_SIMD_TEST_NO_LOWERING(F64x2Div) {
  RunF64x2BinOpTest(execution_tier, lower_simd, kExprF64x2Div, Div);
 }

-#if V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_IA32
 void RunF64x2CompareOpTest(ExecutionTier execution_tier, LowerSimd lower_simd,
                           WasmOpcode opcode, DoubleCompareOp expected_op) {
  WasmRunner<int32_t, double, double> r(execution_tier, lower_simd);
@ -1378,7 +1375,12 @@ void RunF64x2CompareOpTest(ExecutionTier execution_tier, LowerSimd lower_simd,
  byte value1 = 0, value2 = 1;
  byte temp1 = r.AllocateLocal(kWasmS128);
  byte temp2 = r.AllocateLocal(kWasmS128);
+  // Make the lanes of each temp compare differently:
+  // temp1 = y, x and temp2 = y, y.
  BUILD(r, WASM_SET_LOCAL(temp1, WASM_SIMD_F64x2_SPLAT(WASM_GET_LOCAL(value1))),
+        WASM_SET_LOCAL(temp1,
+                       WASM_SIMD_F64x2_REPLACE_LANE(1, WASM_GET_LOCAL(temp1),
+                                                    WASM_GET_LOCAL(value2))),
        WASM_SET_LOCAL(temp2, WASM_SIMD_F64x2_SPLAT(WASM_GET_LOCAL(value2))),
        WASM_SET_GLOBAL(0, WASM_SIMD_BINOP(opcode, WASM_GET_LOCAL(temp1),
                                           WASM_GET_LOCAL(temp2))),
@ -1391,10 +1393,10 @@ void RunF64x2CompareOpTest(ExecutionTier execution_tier, LowerSimd lower_simd,
      double diff = x - y;  // Model comparison as subtraction.
      if (!PlatformCanRepresent(diff)) continue;
      r.Call(x, y);
-      int64_t expected = expected_op(x, y);
-      for (int i = 0; i < 2; i++) {
-        CHECK_EQ(expected, ReadLittleEndianValue<int64_t>(&g[i]));
-      }
+      int64_t expected0 = expected_op(x, y);
+      int64_t expected1 = expected_op(y, y);
+      CHECK_EQ(expected0, ReadLittleEndianValue<int64_t>(&g[0]));
+      CHECK_EQ(expected1, ReadLittleEndianValue<int64_t>(&g[1]));
    }
  }
 }
@ -1423,6 +1425,7 @@ WASM_SIMD_TEST_NO_LOWERING(F64x2Le) {
  RunF64x2CompareOpTest(execution_tier, lower_simd, kExprF64x2Le, LessEqual);
 }

+#if V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_IA32
 WASM_SIMD_TEST_NO_LOWERING(F64x2Min) {
  RunF64x2BinOpTest(execution_tier, lower_simd, kExprF64x2Min, JSMin);
 }