[ia32] Match -0 - x with sign bit flip.

We can use xorps/xorpd on Intel CPUs to flip the sign bit. Ideally we'd
use an absolute 128-bit constant in the code object, as OCaml/GCC
does, however that requires 128-bit alignment for code objects,
which is not yet implemented. So for now we materialize the mask
inline.

As drive-by-fix, don't hardcode xmm0 as scratch double register.

R=svenpanne@chromium.org

Review URL: https://codereview.chromium.org/1064833002

Cr-Commit-Position: refs/heads/master@{#27618}
This commit is contained in:
bmeurer 2015-04-07 03:43:56 -07:00 committed by Commit bot
parent 90cbede588
commit 49bb6617ab
4 changed files with 92 additions and 27 deletions

View File

@ -18,6 +18,9 @@ namespace compiler {
#define __ masm()-> #define __ masm()->
#define kScratchDoubleReg xmm0
// Adds IA-32 specific methods for decoding operands. // Adds IA-32 specific methods for decoding operands.
class IA32OperandConverter : public InstructionOperandConverter { class IA32OperandConverter : public InstructionOperandConverter {
public: public:
@ -474,6 +477,14 @@ void CodeGenerator::AssembleArchInstruction(Instruction* instr) {
case kSSEFloat32Sqrt: case kSSEFloat32Sqrt:
__ sqrtss(i.OutputDoubleRegister(), i.InputOperand(0)); __ sqrtss(i.OutputDoubleRegister(), i.InputOperand(0));
break; break;
case kSSEFloat32Neg: {
// TODO(bmeurer): Use 128-bit constants.
// TODO(turbofan): Add AVX version with relaxed register constraints.
__ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
__ psllq(kScratchDoubleReg, 31);
__ xorps(i.OutputDoubleRegister(), kScratchDoubleReg);
break;
}
case kSSEFloat64Cmp: case kSSEFloat64Cmp:
__ ucomisd(i.InputDoubleRegister(0), i.InputOperand(1)); __ ucomisd(i.InputDoubleRegister(0), i.InputOperand(1));
break; break;
@ -520,6 +531,14 @@ void CodeGenerator::AssembleArchInstruction(Instruction* instr) {
__ add(esp, Immediate(kDoubleSize)); __ add(esp, Immediate(kDoubleSize));
break; break;
} }
case kSSEFloat64Neg: {
// TODO(bmeurer): Use 128-bit constants.
// TODO(turbofan): Add AVX version with relaxed register constraints.
__ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
__ psllq(kScratchDoubleReg, 63);
__ xorpd(i.OutputDoubleRegister(), kScratchDoubleReg);
break;
}
case kSSEFloat64Sqrt: case kSSEFloat64Sqrt:
__ sqrtsd(i.OutputDoubleRegister(), i.InputOperand(0)); __ sqrtsd(i.OutputDoubleRegister(), i.InputOperand(0));
break; break;
@ -540,10 +559,9 @@ void CodeGenerator::AssembleArchInstruction(Instruction* instr) {
__ cvttsd2si(i.OutputRegister(), i.InputOperand(0)); __ cvttsd2si(i.OutputRegister(), i.InputOperand(0));
break; break;
case kSSEFloat64ToUint32: { case kSSEFloat64ToUint32: {
XMMRegister scratch = xmm0; __ Move(kScratchDoubleReg, -2147483648.0);
__ Move(scratch, -2147483648.0); __ addsd(kScratchDoubleReg, i.InputOperand(0));
__ addsd(scratch, i.InputOperand(0)); __ cvttsd2si(i.OutputRegister(), kScratchDoubleReg);
__ cvttsd2si(i.OutputRegister(), scratch);
__ add(i.OutputRegister(), Immediate(0x80000000)); __ add(i.OutputRegister(), Immediate(0x80000000));
break; break;
} }
@ -1303,10 +1321,9 @@ void CodeGenerator::AssembleMove(InstructionOperand* source,
XMMRegister dst = g.ToDoubleRegister(destination); XMMRegister dst = g.ToDoubleRegister(destination);
__ movsd(dst, src); __ movsd(dst, src);
} else { } else {
// We rely on having xmm0 available as a fixed scratch register.
Operand dst = g.ToOperand(destination); Operand dst = g.ToOperand(destination);
__ movsd(xmm0, src); __ movsd(kScratchDoubleReg, src);
__ movsd(dst, xmm0); __ movsd(dst, kScratchDoubleReg);
} }
} else { } else {
UNREACHABLE(); UNREACHABLE();
@ -1336,33 +1353,31 @@ void CodeGenerator::AssembleSwap(InstructionOperand* source,
__ pop(dst); __ pop(dst);
__ pop(src); __ pop(src);
} else if (source->IsDoubleRegister() && destination->IsDoubleRegister()) { } else if (source->IsDoubleRegister() && destination->IsDoubleRegister()) {
// XMM register-register swap. We rely on having xmm0 // XMM register-register swap.
// available as a fixed scratch register.
XMMRegister src = g.ToDoubleRegister(source); XMMRegister src = g.ToDoubleRegister(source);
XMMRegister dst = g.ToDoubleRegister(destination); XMMRegister dst = g.ToDoubleRegister(destination);
__ movaps(xmm0, src); __ movaps(kScratchDoubleReg, src);
__ movaps(src, dst); __ movaps(src, dst);
__ movaps(dst, xmm0); __ movaps(dst, kScratchDoubleReg);
} else if (source->IsDoubleRegister() && destination->IsDoubleStackSlot()) { } else if (source->IsDoubleRegister() && destination->IsDoubleStackSlot()) {
// XMM register-memory swap. We rely on having xmm0 // XMM register-memory swap.
// available as a fixed scratch register.
XMMRegister reg = g.ToDoubleRegister(source); XMMRegister reg = g.ToDoubleRegister(source);
Operand other = g.ToOperand(destination); Operand other = g.ToOperand(destination);
__ movsd(xmm0, other); __ movsd(kScratchDoubleReg, other);
__ movsd(other, reg); __ movsd(other, reg);
__ movaps(reg, xmm0); __ movaps(reg, kScratchDoubleReg);
} else if (source->IsDoubleStackSlot() && destination->IsDoubleStackSlot()) { } else if (source->IsDoubleStackSlot() && destination->IsDoubleStackSlot()) {
// Double-width memory-to-memory. // Double-width memory-to-memory.
Operand src0 = g.ToOperand(source); Operand src0 = g.ToOperand(source);
Operand src1 = g.HighOperand(source); Operand src1 = g.HighOperand(source);
Operand dst0 = g.ToOperand(destination); Operand dst0 = g.ToOperand(destination);
Operand dst1 = g.HighOperand(destination); Operand dst1 = g.HighOperand(destination);
__ movsd(xmm0, dst0); // Save destination in xmm0. __ movsd(kScratchDoubleReg, dst0); // Save destination in scratch register.
__ push(src0); // Then use stack to copy source to destination. __ push(src0); // Then use stack to copy source to destination.
__ pop(dst0); __ pop(dst0);
__ push(src1); __ push(src1);
__ pop(dst1); __ pop(dst1);
__ movsd(src0, xmm0); __ movsd(src0, kScratchDoubleReg);
} else { } else {
// No other combinations are possible. // No other combinations are possible.
UNREACHABLE(); UNREACHABLE();

View File

@ -38,6 +38,7 @@ namespace compiler {
V(SSEFloat32Div) \ V(SSEFloat32Div) \
V(SSEFloat32Max) \ V(SSEFloat32Max) \
V(SSEFloat32Min) \ V(SSEFloat32Min) \
V(SSEFloat32Neg) \
V(SSEFloat32Sqrt) \ V(SSEFloat32Sqrt) \
V(SSEFloat64Cmp) \ V(SSEFloat64Cmp) \
V(SSEFloat64Add) \ V(SSEFloat64Add) \
@ -47,6 +48,7 @@ namespace compiler {
V(SSEFloat64Mod) \ V(SSEFloat64Mod) \
V(SSEFloat64Max) \ V(SSEFloat64Max) \
V(SSEFloat64Min) \ V(SSEFloat64Min) \
V(SSEFloat64Neg) \
V(SSEFloat64Sqrt) \ V(SSEFloat64Sqrt) \
V(SSEFloat64Round) \ V(SSEFloat64Round) \
V(SSEFloat32ToFloat64) \ V(SSEFloat32ToFloat64) \

View File

@ -675,6 +675,13 @@ void InstructionSelector::VisitFloat64Add(Node* node) {
void InstructionSelector::VisitFloat32Sub(Node* node) { void InstructionSelector::VisitFloat32Sub(Node* node) {
IA32OperandGenerator g(this);
Float32BinopMatcher m(node);
if (m.left().IsMinusZero()) {
Emit(kSSEFloat32Neg, g.DefineSameAsFirst(node),
g.UseRegister(m.right().node()));
return;
}
VisitRROFloat(this, node, kAVXFloat32Sub, kSSEFloat32Sub); VisitRROFloat(this, node, kAVXFloat32Sub, kSSEFloat32Sub);
} }
@ -682,17 +689,22 @@ void InstructionSelector::VisitFloat32Sub(Node* node) {
void InstructionSelector::VisitFloat64Sub(Node* node) { void InstructionSelector::VisitFloat64Sub(Node* node) {
IA32OperandGenerator g(this); IA32OperandGenerator g(this);
Float64BinopMatcher m(node); Float64BinopMatcher m(node);
if (m.left().IsMinusZero() && m.right().IsFloat64RoundDown() && if (m.left().IsMinusZero()) {
CanCover(m.node(), m.right().node())) { if (m.right().IsFloat64RoundDown() &&
if (m.right().InputAt(0)->opcode() == IrOpcode::kFloat64Sub && CanCover(m.node(), m.right().node())) {
CanCover(m.right().node(), m.right().InputAt(0))) { if (m.right().InputAt(0)->opcode() == IrOpcode::kFloat64Sub &&
Float64BinopMatcher mright0(m.right().InputAt(0)); CanCover(m.right().node(), m.right().InputAt(0))) {
if (mright0.left().IsMinusZero()) { Float64BinopMatcher mright0(m.right().InputAt(0));
Emit(kSSEFloat64Round | MiscField::encode(kRoundUp), if (mright0.left().IsMinusZero()) {
g.DefineAsRegister(node), g.UseRegister(mright0.right().node())); Emit(kSSEFloat64Round | MiscField::encode(kRoundUp),
return; g.DefineAsRegister(node), g.UseRegister(mright0.right().node()));
return;
}
} }
} }
Emit(kSSEFloat64Neg, g.DefineSameAsFirst(node),
g.UseRegister(m.right().node()));
return;
} }
VisitRROFloat(this, node, kAVXFloat64Sub, kSSEFloat64Sub); VisitRROFloat(this, node, kAVXFloat64Sub, kSSEFloat64Sub);
} }

View File

@ -635,6 +635,10 @@ TEST_F(InstructionSelectorTest, Int32MulHigh) {
} }
// -----------------------------------------------------------------------------
// Floating point operations.
TEST_F(InstructionSelectorTest, Float64BinopArithmetic) { TEST_F(InstructionSelectorTest, Float64BinopArithmetic) {
{ {
StreamBuilder m(this, kMachFloat64, kMachFloat64, kMachFloat64); StreamBuilder m(this, kMachFloat64, kMachFloat64, kMachFloat64);
@ -667,6 +671,38 @@ TEST_F(InstructionSelectorTest, Float64BinopArithmetic) {
} }
TEST_F(InstructionSelectorTest, Float32SubWithMinusZeroAndParameter) {
StreamBuilder m(this, kMachFloat32, kMachFloat32);
Node* const p0 = m.Parameter(0);
Node* const n = m.Float32Sub(m.Float32Constant(-0.0f), p0);
m.Return(n);
Stream s = m.Build();
ASSERT_EQ(1U, s.size());
EXPECT_EQ(kSSEFloat32Neg, s[0]->arch_opcode());
ASSERT_EQ(1U, s[0]->InputCount());
EXPECT_EQ(s.ToVreg(p0), s.ToVreg(s[0]->InputAt(0)));
ASSERT_EQ(1U, s[0]->OutputCount());
EXPECT_EQ(s.ToVreg(n), s.ToVreg(s[0]->Output()));
EXPECT_EQ(kFlags_none, s[0]->flags_mode());
}
TEST_F(InstructionSelectorTest, Float64SubWithMinusZeroAndParameter) {
StreamBuilder m(this, kMachFloat64, kMachFloat64);
Node* const p0 = m.Parameter(0);
Node* const n = m.Float64Sub(m.Float64Constant(-0.0), p0);
m.Return(n);
Stream s = m.Build();
ASSERT_EQ(1U, s.size());
EXPECT_EQ(kSSEFloat64Neg, s[0]->arch_opcode());
ASSERT_EQ(1U, s[0]->InputCount());
EXPECT_EQ(s.ToVreg(p0), s.ToVreg(s[0]->InputAt(0)));
ASSERT_EQ(1U, s[0]->OutputCount());
EXPECT_EQ(s.ToVreg(n), s.ToVreg(s[0]->Output()));
EXPECT_EQ(kFlags_none, s[0]->flags_mode());
}
// ----------------------------------------------------------------------------- // -----------------------------------------------------------------------------
// Miscellaneous. // Miscellaneous.