[x86] Allow (v)divsd->(v)mulsd to execute in parallel.
This tweak was already present in CrankShaft for the non-AVX case. As it turns out, it's also relevant even with AVX. Now the same optimization is applied in case of TurboFan as well. R=dcarney@chromium.org Review URL: https://codereview.chromium.org/1081033003 Cr-Commit-Position: refs/heads/master@{#27774}
This commit is contained in:
parent
1dbc432729
commit
e21f9ab42b
@ -468,6 +468,9 @@ void CodeGenerator::AssembleArchInstruction(Instruction* instr) {
|
||||
break;
|
||||
case kSSEFloat32Div:
|
||||
__ divss(i.InputDoubleRegister(0), i.InputOperand(1));
|
||||
// Don't delete this mov. It may improve performance on some CPUs,
|
||||
// when there is a (v)mulss depending on the result.
|
||||
__ movaps(i.OutputDoubleRegister(), i.OutputDoubleRegister());
|
||||
break;
|
||||
case kSSEFloat32Max:
|
||||
__ maxss(i.InputDoubleRegister(0), i.InputOperand(1));
|
||||
@ -506,6 +509,9 @@ void CodeGenerator::AssembleArchInstruction(Instruction* instr) {
|
||||
break;
|
||||
case kSSEFloat64Div:
|
||||
__ divsd(i.InputDoubleRegister(0), i.InputOperand(1));
|
||||
// Don't delete this mov. It may improve performance on some CPUs,
|
||||
// when there is a (v)mulsd depending on the result.
|
||||
__ movaps(i.OutputDoubleRegister(), i.OutputDoubleRegister());
|
||||
break;
|
||||
case kSSEFloat64Max:
|
||||
__ maxsd(i.InputDoubleRegister(0), i.InputOperand(1));
|
||||
@ -629,6 +635,9 @@ void CodeGenerator::AssembleArchInstruction(Instruction* instr) {
|
||||
CpuFeatureScope avx_scope(masm(), AVX);
|
||||
__ vdivss(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
|
||||
i.InputOperand(1));
|
||||
// Don't delete this mov. It may improve performance on some CPUs,
|
||||
// when there is a (v)mulss depending on the result.
|
||||
__ movaps(i.OutputDoubleRegister(), i.OutputDoubleRegister());
|
||||
break;
|
||||
}
|
||||
case kAVXFloat32Max: {
|
||||
@ -665,6 +674,9 @@ void CodeGenerator::AssembleArchInstruction(Instruction* instr) {
|
||||
CpuFeatureScope avx_scope(masm(), AVX);
|
||||
__ vdivsd(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
|
||||
i.InputOperand(1));
|
||||
// Don't delete this mov. It may improve performance on some CPUs,
|
||||
// when there is a (v)mulsd depending on the result.
|
||||
__ movaps(i.OutputDoubleRegister(), i.OutputDoubleRegister());
|
||||
break;
|
||||
}
|
||||
case kAVXFloat64Max: {
|
||||
|
@ -726,6 +726,9 @@ void CodeGenerator::AssembleArchInstruction(Instruction* instr) {
|
||||
break;
|
||||
case kSSEFloat32Div:
|
||||
ASSEMBLE_SSE_BINOP(divss);
|
||||
// Don't delete this mov. It may improve performance on some CPUs,
|
||||
// when there is a (v)mulss depending on the result.
|
||||
__ movaps(i.OutputDoubleRegister(), i.OutputDoubleRegister());
|
||||
break;
|
||||
case kSSEFloat32Abs: {
|
||||
// TODO(bmeurer): Use RIP relative 128-bit constants.
|
||||
@ -767,6 +770,9 @@ void CodeGenerator::AssembleArchInstruction(Instruction* instr) {
|
||||
break;
|
||||
case kSSEFloat64Div:
|
||||
ASSEMBLE_SSE_BINOP(divsd);
|
||||
// Don't delete this mov. It may improve performance on some CPUs,
|
||||
// when there is a (v)mulsd depending on the result.
|
||||
__ movaps(i.OutputDoubleRegister(), i.OutputDoubleRegister());
|
||||
break;
|
||||
case kSSEFloat64Mod: {
|
||||
__ subq(rsp, Immediate(kDoubleSize));
|
||||
@ -919,6 +925,9 @@ void CodeGenerator::AssembleArchInstruction(Instruction* instr) {
|
||||
break;
|
||||
case kAVXFloat32Div:
|
||||
ASSEMBLE_AVX_BINOP(vdivss);
|
||||
// Don't delete this mov. It may improve performance on some CPUs,
|
||||
// when there is a (v)mulss depending on the result.
|
||||
__ movaps(i.OutputDoubleRegister(), i.OutputDoubleRegister());
|
||||
break;
|
||||
case kAVXFloat32Max:
|
||||
ASSEMBLE_AVX_BINOP(vmaxss);
|
||||
@ -946,6 +955,9 @@ void CodeGenerator::AssembleArchInstruction(Instruction* instr) {
|
||||
break;
|
||||
case kAVXFloat64Div:
|
||||
ASSEMBLE_AVX_BINOP(vdivsd);
|
||||
// Don't delete this mov. It may improve performance on some CPUs,
|
||||
// when there is a (v)mulsd depending on the result.
|
||||
__ movaps(i.OutputDoubleRegister(), i.OutputDoubleRegister());
|
||||
break;
|
||||
case kAVXFloat64Max:
|
||||
ASSEMBLE_AVX_BINOP(vmaxsd);
|
||||
|
@ -2040,10 +2040,10 @@ void LCodeGen::DoArithmeticD(LArithmeticD* instr) {
|
||||
} else {
|
||||
DCHECK(result.is(left));
|
||||
__ divsd(left, right);
|
||||
// Don't delete this mov. It may improve performance on some CPUs,
|
||||
// when there is a mulsd depending on the result
|
||||
__ movaps(left, left);
|
||||
}
|
||||
// Don't delete this mov. It may improve performance on some CPUs,
|
||||
// when there is a (v)mulsd depending on the result
|
||||
__ movaps(result, result);
|
||||
break;
|
||||
case Token::MOD: {
|
||||
// Pass two doubles as arguments on the stack.
|
||||
|
@ -2076,10 +2076,10 @@ void LCodeGen::DoArithmeticD(LArithmeticD* instr) {
|
||||
} else {
|
||||
DCHECK(result.is(left));
|
||||
__ divsd(left, right);
|
||||
// Don't delete this mov. It may improve performance on some CPUs,
|
||||
// when there is a mulsd depending on the result
|
||||
__ movaps(left, left);
|
||||
}
|
||||
// Don't delete this mov. It may improve performance on some CPUs,
|
||||
// when there is a (v)mulsd depending on the result
|
||||
__ movaps(result, result);
|
||||
break;
|
||||
case Token::MOD: {
|
||||
XMMRegister xmm_scratch = double_scratch0();
|
||||
|
Loading…
Reference in New Issue
Block a user