[ia32] Match -0 - x with sign bit flip.

We can use xorps/xorpd on Intel CPUs to flip the sign bit. Ideally we'd use an absolute 128-bit constant in the code object, as OCaml/GCC does, however that requires 128-bit alignment for code objects, which is not yet implemented. So for now we materialize the mask inline. As drive-by-fix, don't hardcode xmm0 as scratch double register. R=svenpanne@chromium.org Review URL: https://codereview.chromium.org/1064833002 Cr-Commit-Position: refs/heads/master@{#27618}
2015-04-07 03:43:56 -07:00 · 2015-04-07 03:43:56 -07:00 · 49bb6617ab
commit 49bb6617ab
parent 90cbede588
4 changed files with 92 additions and 27 deletions
--- a/src/compiler/ia32/code-generator-ia32.cc
+++ b/src/compiler/ia32/code-generator-ia32.cc
@ -18,6 +18,9 @@ namespace compiler {
 #define __ masm()->


+#define kScratchDoubleReg xmm0
+
+
 // Adds IA-32 specific methods for decoding operands.
 class IA32OperandConverter : public InstructionOperandConverter {
 public:
@ -474,6 +477,14 @@ void CodeGenerator::AssembleArchInstruction(Instruction* instr) {
    case kSSEFloat32Sqrt:
      __ sqrtss(i.OutputDoubleRegister(), i.InputOperand(0));
      break;
+    case kSSEFloat32Neg: {
+      // TODO(bmeurer): Use 128-bit constants.
+      // TODO(turbofan): Add AVX version with relaxed register constraints.
+      __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
+      __ psllq(kScratchDoubleReg, 31);
+      __ xorps(i.OutputDoubleRegister(), kScratchDoubleReg);
+      break;
+    }
    case kSSEFloat64Cmp:
      __ ucomisd(i.InputDoubleRegister(0), i.InputOperand(1));
      break;
@ -520,6 +531,14 @@ void CodeGenerator::AssembleArchInstruction(Instruction* instr) {
      __ add(esp, Immediate(kDoubleSize));
      break;
    }
+    case kSSEFloat64Neg: {
+      // TODO(bmeurer): Use 128-bit constants.
+      // TODO(turbofan): Add AVX version with relaxed register constraints.
+      __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
+      __ psllq(kScratchDoubleReg, 63);
+      __ xorpd(i.OutputDoubleRegister(), kScratchDoubleReg);
+      break;
+    }
    case kSSEFloat64Sqrt:
      __ sqrtsd(i.OutputDoubleRegister(), i.InputOperand(0));
      break;
@ -540,10 +559,9 @@ void CodeGenerator::AssembleArchInstruction(Instruction* instr) {
      __ cvttsd2si(i.OutputRegister(), i.InputOperand(0));
      break;
    case kSSEFloat64ToUint32: {
-      XMMRegister scratch = xmm0;
-      __ Move(scratch, -2147483648.0);
-      __ addsd(scratch, i.InputOperand(0));
-      __ cvttsd2si(i.OutputRegister(), scratch);
+      __ Move(kScratchDoubleReg, -2147483648.0);
+      __ addsd(kScratchDoubleReg, i.InputOperand(0));
+      __ cvttsd2si(i.OutputRegister(), kScratchDoubleReg);
      __ add(i.OutputRegister(), Immediate(0x80000000));
      break;
    }
@ -1303,10 +1321,9 @@ void CodeGenerator::AssembleMove(InstructionOperand* source,
      XMMRegister dst = g.ToDoubleRegister(destination);
      __ movsd(dst, src);
    } else {
-      // We rely on having xmm0 available as a fixed scratch register.
      Operand dst = g.ToOperand(destination);
-      __ movsd(xmm0, src);
-      __ movsd(dst, xmm0);
+      __ movsd(kScratchDoubleReg, src);
+      __ movsd(dst, kScratchDoubleReg);
    }
  } else {
    UNREACHABLE();
@ -1336,33 +1353,31 @@ void CodeGenerator::AssembleSwap(InstructionOperand* source,
    __ pop(dst);
    __ pop(src);
  } else if (source->IsDoubleRegister() && destination->IsDoubleRegister()) {
-    // XMM register-register swap. We rely on having xmm0
-    // available as a fixed scratch register.
+    // XMM register-register swap.
    XMMRegister src = g.ToDoubleRegister(source);
    XMMRegister dst = g.ToDoubleRegister(destination);
-    __ movaps(xmm0, src);
+    __ movaps(kScratchDoubleReg, src);
    __ movaps(src, dst);
-    __ movaps(dst, xmm0);
+    __ movaps(dst, kScratchDoubleReg);
  } else if (source->IsDoubleRegister() && destination->IsDoubleStackSlot()) {
-    // XMM register-memory swap.  We rely on having xmm0
-    // available as a fixed scratch register.
+    // XMM register-memory swap.
    XMMRegister reg = g.ToDoubleRegister(source);
    Operand other = g.ToOperand(destination);
-    __ movsd(xmm0, other);
+    __ movsd(kScratchDoubleReg, other);
    __ movsd(other, reg);
-    __ movaps(reg, xmm0);
+    __ movaps(reg, kScratchDoubleReg);
  } else if (source->IsDoubleStackSlot() && destination->IsDoubleStackSlot()) {
    // Double-width memory-to-memory.
    Operand src0 = g.ToOperand(source);
    Operand src1 = g.HighOperand(source);
    Operand dst0 = g.ToOperand(destination);
    Operand dst1 = g.HighOperand(destination);
-    __ movsd(xmm0, dst0);  // Save destination in xmm0.
+    __ movsd(kScratchDoubleReg, dst0);  // Save destination in scratch register.
    __ push(src0);  // Then use stack to copy source to destination.
    __ pop(dst0);
    __ push(src1);
    __ pop(dst1);
-    __ movsd(src0, xmm0);
+    __ movsd(src0, kScratchDoubleReg);
  } else {
    // No other combinations are possible.
    UNREACHABLE();
--- a/src/compiler/ia32/instruction-codes-ia32.h
+++ b/src/compiler/ia32/instruction-codes-ia32.h
@ -38,6 +38,7 @@ namespace compiler {
  V(SSEFloat32Div)                 \
  V(SSEFloat32Max)                 \
  V(SSEFloat32Min)                 \
+  V(SSEFloat32Neg)                 \
  V(SSEFloat32Sqrt)                \
  V(SSEFloat64Cmp)                 \
  V(SSEFloat64Add)                 \
@ -47,6 +48,7 @@ namespace compiler {
  V(SSEFloat64Mod)                 \
  V(SSEFloat64Max)                 \
  V(SSEFloat64Min)                 \
+  V(SSEFloat64Neg)                 \
  V(SSEFloat64Sqrt)                \
  V(SSEFloat64Round)               \
  V(SSEFloat32ToFloat64)           \
--- a/src/compiler/ia32/instruction-selector-ia32.cc
+++ b/src/compiler/ia32/instruction-selector-ia32.cc
@ -675,6 +675,13 @@ void InstructionSelector::VisitFloat64Add(Node* node) {


 void InstructionSelector::VisitFloat32Sub(Node* node) {
+  IA32OperandGenerator g(this);
+  Float32BinopMatcher m(node);
+  if (m.left().IsMinusZero()) {
+    Emit(kSSEFloat32Neg, g.DefineSameAsFirst(node),
+         g.UseRegister(m.right().node()));
+    return;
+  }
  VisitRROFloat(this, node, kAVXFloat32Sub, kSSEFloat32Sub);
 }

@ -682,7 +689,8 @@ void InstructionSelector::VisitFloat32Sub(Node* node) {
 void InstructionSelector::VisitFloat64Sub(Node* node) {
  IA32OperandGenerator g(this);
  Float64BinopMatcher m(node);
-  if (m.left().IsMinusZero() && m.right().IsFloat64RoundDown() &&
+  if (m.left().IsMinusZero()) {
+    if (m.right().IsFloat64RoundDown() &&
        CanCover(m.node(), m.right().node())) {
      if (m.right().InputAt(0)->opcode() == IrOpcode::kFloat64Sub &&
          CanCover(m.right().node(), m.right().InputAt(0))) {
@ -694,6 +702,10 @@ void InstructionSelector::VisitFloat64Sub(Node* node) {
        }
      }
    }
+    Emit(kSSEFloat64Neg, g.DefineSameAsFirst(node),
+         g.UseRegister(m.right().node()));
+    return;
+  }
  VisitRROFloat(this, node, kAVXFloat64Sub, kSSEFloat64Sub);
 }

--- a/test/unittests/compiler/ia32/instruction-selector-ia32-unittest.cc
+++ b/test/unittests/compiler/ia32/instruction-selector-ia32-unittest.cc
@ -635,6 +635,10 @@ TEST_F(InstructionSelectorTest, Int32MulHigh) {
 }


+// -----------------------------------------------------------------------------
+// Floating point operations.
+
+
 TEST_F(InstructionSelectorTest, Float64BinopArithmetic) {
  {
    StreamBuilder m(this, kMachFloat64, kMachFloat64, kMachFloat64);
@ -667,6 +671,38 @@ TEST_F(InstructionSelectorTest, Float64BinopArithmetic) {
 }


+TEST_F(InstructionSelectorTest, Float32SubWithMinusZeroAndParameter) {
+  StreamBuilder m(this, kMachFloat32, kMachFloat32);
+  Node* const p0 = m.Parameter(0);
+  Node* const n = m.Float32Sub(m.Float32Constant(-0.0f), p0);
+  m.Return(n);
+  Stream s = m.Build();
+  ASSERT_EQ(1U, s.size());
+  EXPECT_EQ(kSSEFloat32Neg, s[0]->arch_opcode());
+  ASSERT_EQ(1U, s[0]->InputCount());
+  EXPECT_EQ(s.ToVreg(p0), s.ToVreg(s[0]->InputAt(0)));
+  ASSERT_EQ(1U, s[0]->OutputCount());
+  EXPECT_EQ(s.ToVreg(n), s.ToVreg(s[0]->Output()));
+  EXPECT_EQ(kFlags_none, s[0]->flags_mode());
+}
+
+
+TEST_F(InstructionSelectorTest, Float64SubWithMinusZeroAndParameter) {
+  StreamBuilder m(this, kMachFloat64, kMachFloat64);
+  Node* const p0 = m.Parameter(0);
+  Node* const n = m.Float64Sub(m.Float64Constant(-0.0), p0);
+  m.Return(n);
+  Stream s = m.Build();
+  ASSERT_EQ(1U, s.size());
+  EXPECT_EQ(kSSEFloat64Neg, s[0]->arch_opcode());
+  ASSERT_EQ(1U, s[0]->InputCount());
+  EXPECT_EQ(s.ToVreg(p0), s.ToVreg(s[0]->InputAt(0)));
+  ASSERT_EQ(1U, s[0]->OutputCount());
+  EXPECT_EQ(s.ToVreg(n), s.ToVreg(s[0]->Output()));
+  EXPECT_EQ(kFlags_none, s[0]->flags_mode());
+}
+
+
 // -----------------------------------------------------------------------------
 // Miscellaneous.