[ia32] Match -0 - x with sign bit flip.

We can use xorps/xorpd on Intel CPUs to flip the sign bit. Ideally we'd
use an absolute 128-bit constant in the code object, as OCaml/GCC
does, however that requires 128-bit alignment for code objects,
which is not yet implemented. So for now we materialize the mask
inline.

As drive-by-fix, don't hardcode xmm0 as scratch double register.

R=svenpanne@chromium.org

Review URL: https://codereview.chromium.org/1064833002

Cr-Commit-Position: refs/heads/master@{#27618}
This commit is contained in:
bmeurer 2015-04-07 03:43:56 -07:00 committed by Commit bot
parent 90cbede588
commit 49bb6617ab
4 changed files with 92 additions and 27 deletions

View File

@ -18,6 +18,9 @@ namespace compiler {
#define __ masm()->
#define kScratchDoubleReg xmm0
// Adds IA-32 specific methods for decoding operands.
class IA32OperandConverter : public InstructionOperandConverter {
public:
@ -474,6 +477,14 @@ void CodeGenerator::AssembleArchInstruction(Instruction* instr) {
case kSSEFloat32Sqrt:
__ sqrtss(i.OutputDoubleRegister(), i.InputOperand(0));
break;
case kSSEFloat32Neg: {
// TODO(bmeurer): Use 128-bit constants.
// TODO(turbofan): Add AVX version with relaxed register constraints.
__ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
__ psllq(kScratchDoubleReg, 31);
__ xorps(i.OutputDoubleRegister(), kScratchDoubleReg);
break;
}
case kSSEFloat64Cmp:
__ ucomisd(i.InputDoubleRegister(0), i.InputOperand(1));
break;
@ -520,6 +531,14 @@ void CodeGenerator::AssembleArchInstruction(Instruction* instr) {
__ add(esp, Immediate(kDoubleSize));
break;
}
case kSSEFloat64Neg: {
// TODO(bmeurer): Use 128-bit constants.
// TODO(turbofan): Add AVX version with relaxed register constraints.
__ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
__ psllq(kScratchDoubleReg, 63);
__ xorpd(i.OutputDoubleRegister(), kScratchDoubleReg);
break;
}
case kSSEFloat64Sqrt:
__ sqrtsd(i.OutputDoubleRegister(), i.InputOperand(0));
break;
@ -540,10 +559,9 @@ void CodeGenerator::AssembleArchInstruction(Instruction* instr) {
__ cvttsd2si(i.OutputRegister(), i.InputOperand(0));
break;
case kSSEFloat64ToUint32: {
XMMRegister scratch = xmm0;
__ Move(scratch, -2147483648.0);
__ addsd(scratch, i.InputOperand(0));
__ cvttsd2si(i.OutputRegister(), scratch);
__ Move(kScratchDoubleReg, -2147483648.0);
__ addsd(kScratchDoubleReg, i.InputOperand(0));
__ cvttsd2si(i.OutputRegister(), kScratchDoubleReg);
__ add(i.OutputRegister(), Immediate(0x80000000));
break;
}
@ -1303,10 +1321,9 @@ void CodeGenerator::AssembleMove(InstructionOperand* source,
XMMRegister dst = g.ToDoubleRegister(destination);
__ movsd(dst, src);
} else {
// We rely on having xmm0 available as a fixed scratch register.
Operand dst = g.ToOperand(destination);
__ movsd(xmm0, src);
__ movsd(dst, xmm0);
__ movsd(kScratchDoubleReg, src);
__ movsd(dst, kScratchDoubleReg);
}
} else {
UNREACHABLE();
@ -1336,33 +1353,31 @@ void CodeGenerator::AssembleSwap(InstructionOperand* source,
__ pop(dst);
__ pop(src);
} else if (source->IsDoubleRegister() && destination->IsDoubleRegister()) {
// XMM register-register swap. We rely on having xmm0
// available as a fixed scratch register.
// XMM register-register swap.
XMMRegister src = g.ToDoubleRegister(source);
XMMRegister dst = g.ToDoubleRegister(destination);
__ movaps(xmm0, src);
__ movaps(kScratchDoubleReg, src);
__ movaps(src, dst);
__ movaps(dst, xmm0);
__ movaps(dst, kScratchDoubleReg);
} else if (source->IsDoubleRegister() && destination->IsDoubleStackSlot()) {
// XMM register-memory swap. We rely on having xmm0
// available as a fixed scratch register.
// XMM register-memory swap.
XMMRegister reg = g.ToDoubleRegister(source);
Operand other = g.ToOperand(destination);
__ movsd(xmm0, other);
__ movsd(kScratchDoubleReg, other);
__ movsd(other, reg);
__ movaps(reg, xmm0);
__ movaps(reg, kScratchDoubleReg);
} else if (source->IsDoubleStackSlot() && destination->IsDoubleStackSlot()) {
// Double-width memory-to-memory.
Operand src0 = g.ToOperand(source);
Operand src1 = g.HighOperand(source);
Operand dst0 = g.ToOperand(destination);
Operand dst1 = g.HighOperand(destination);
__ movsd(xmm0, dst0); // Save destination in xmm0.
__ movsd(kScratchDoubleReg, dst0); // Save destination in scratch register.
__ push(src0); // Then use stack to copy source to destination.
__ pop(dst0);
__ push(src1);
__ pop(dst1);
__ movsd(src0, xmm0);
__ movsd(src0, kScratchDoubleReg);
} else {
// No other combinations are possible.
UNREACHABLE();

View File

@ -38,6 +38,7 @@ namespace compiler {
V(SSEFloat32Div) \
V(SSEFloat32Max) \
V(SSEFloat32Min) \
V(SSEFloat32Neg) \
V(SSEFloat32Sqrt) \
V(SSEFloat64Cmp) \
V(SSEFloat64Add) \
@ -47,6 +48,7 @@ namespace compiler {
V(SSEFloat64Mod) \
V(SSEFloat64Max) \
V(SSEFloat64Min) \
V(SSEFloat64Neg) \
V(SSEFloat64Sqrt) \
V(SSEFloat64Round) \
V(SSEFloat32ToFloat64) \

View File

@ -675,6 +675,13 @@ void InstructionSelector::VisitFloat64Add(Node* node) {
void InstructionSelector::VisitFloat32Sub(Node* node) {
IA32OperandGenerator g(this);
Float32BinopMatcher m(node);
if (m.left().IsMinusZero()) {
Emit(kSSEFloat32Neg, g.DefineSameAsFirst(node),
g.UseRegister(m.right().node()));
return;
}
VisitRROFloat(this, node, kAVXFloat32Sub, kSSEFloat32Sub);
}
@ -682,7 +689,8 @@ void InstructionSelector::VisitFloat32Sub(Node* node) {
void InstructionSelector::VisitFloat64Sub(Node* node) {
IA32OperandGenerator g(this);
Float64BinopMatcher m(node);
if (m.left().IsMinusZero() && m.right().IsFloat64RoundDown() &&
if (m.left().IsMinusZero()) {
if (m.right().IsFloat64RoundDown() &&
CanCover(m.node(), m.right().node())) {
if (m.right().InputAt(0)->opcode() == IrOpcode::kFloat64Sub &&
CanCover(m.right().node(), m.right().InputAt(0))) {
@ -694,6 +702,10 @@ void InstructionSelector::VisitFloat64Sub(Node* node) {
}
}
}
Emit(kSSEFloat64Neg, g.DefineSameAsFirst(node),
g.UseRegister(m.right().node()));
return;
}
VisitRROFloat(this, node, kAVXFloat64Sub, kSSEFloat64Sub);
}

View File

@ -635,6 +635,10 @@ TEST_F(InstructionSelectorTest, Int32MulHigh) {
}
// -----------------------------------------------------------------------------
// Floating point operations.
TEST_F(InstructionSelectorTest, Float64BinopArithmetic) {
{
StreamBuilder m(this, kMachFloat64, kMachFloat64, kMachFloat64);
@ -667,6 +671,38 @@ TEST_F(InstructionSelectorTest, Float64BinopArithmetic) {
}
TEST_F(InstructionSelectorTest, Float32SubWithMinusZeroAndParameter) {
StreamBuilder m(this, kMachFloat32, kMachFloat32);
Node* const p0 = m.Parameter(0);
Node* const n = m.Float32Sub(m.Float32Constant(-0.0f), p0);
m.Return(n);
Stream s = m.Build();
ASSERT_EQ(1U, s.size());
EXPECT_EQ(kSSEFloat32Neg, s[0]->arch_opcode());
ASSERT_EQ(1U, s[0]->InputCount());
EXPECT_EQ(s.ToVreg(p0), s.ToVreg(s[0]->InputAt(0)));
ASSERT_EQ(1U, s[0]->OutputCount());
EXPECT_EQ(s.ToVreg(n), s.ToVreg(s[0]->Output()));
EXPECT_EQ(kFlags_none, s[0]->flags_mode());
}
TEST_F(InstructionSelectorTest, Float64SubWithMinusZeroAndParameter) {
StreamBuilder m(this, kMachFloat64, kMachFloat64);
Node* const p0 = m.Parameter(0);
Node* const n = m.Float64Sub(m.Float64Constant(-0.0), p0);
m.Return(n);
Stream s = m.Build();
ASSERT_EQ(1U, s.size());
EXPECT_EQ(kSSEFloat64Neg, s[0]->arch_opcode());
ASSERT_EQ(1U, s[0]->InputCount());
EXPECT_EQ(s.ToVreg(p0), s.ToVreg(s[0]->InputAt(0)));
ASSERT_EQ(1U, s[0]->OutputCount());
EXPECT_EQ(s.ToVreg(n), s.ToVreg(s[0]->Output()));
EXPECT_EQ(kFlags_none, s[0]->flags_mode());
}
// -----------------------------------------------------------------------------
// Miscellaneous.