[turbofan] avoid xchg instruction on Intel

On Intel, xchg stalls the pipeline. We use xchg to implement swap
moves. In a separate exploration, the presence of xchg in a very hot
loop, due to a change in register allocation, lead to over 20% regression.

Simply changing that instruction with push/mov/pop (almost) eliminated
the regression.

In light of that, I removed uses of xchg. This leads to more instructions,
though. That is particularly problematic for long cycles, which, today,
we translate to successions of swaps.

I plan to address this cycle issue in a separate change. For now, the
goal is to unblock the initial work that lead here.

Review URL: https://codereview.chromium.org/1580233003

Cr-Commit-Position: refs/heads/master@{#33282}
This commit is contained in:
mtrofin 2016-01-13 21:13:50 -08:00 committed by Commit bot
parent 1357357647
commit 7440edae1d
6 changed files with 41 additions and 10 deletions

View File

@ -1616,10 +1616,19 @@ void CodeGenerator::AssembleSwap(InstructionOperand* source,
// Register-register.
Register src = g.ToRegister(source);
Register dst = g.ToRegister(destination);
__ xchg(dst, src);
__ push(src);
__ mov(src, dst);
__ pop(dst);
} else if (source->IsRegister() && destination->IsStackSlot()) {
// Register-memory.
__ xchg(g.ToRegister(source), g.ToOperand(destination));
Register src = g.ToRegister(source);
__ push(src);
frame_access_state()->IncreaseSPDelta(1);
Operand dst = g.ToOperand(destination);
__ mov(src, dst);
frame_access_state()->IncreaseSPDelta(-1);
dst = g.ToOperand(destination);
__ pop(dst);
} else if (source->IsStackSlot() && destination->IsStackSlot()) {
// Memory-memory.
Operand dst1 = g.ToOperand(destination);

View File

@ -2046,11 +2046,20 @@ void CodeGenerator::AssembleSwap(InstructionOperand* source,
// combinations are possible.
if (source->IsRegister() && destination->IsRegister()) {
// Register-register.
__ xchgq(g.ToRegister(source), g.ToRegister(destination));
Register src = g.ToRegister(source);
Register dst = g.ToRegister(destination);
__ movq(kScratchRegister, src);
__ movq(src, dst);
__ movq(dst, kScratchRegister);
} else if (source->IsRegister() && destination->IsStackSlot()) {
Register src = g.ToRegister(source);
__ pushq(src);
frame_access_state()->IncreaseSPDelta(1);
Operand dst = g.ToOperand(destination);
__ xchgq(src, dst);
__ movq(src, dst);
frame_access_state()->IncreaseSPDelta(-1);
dst = g.ToOperand(destination);
__ popq(dst);
} else if ((source->IsStackSlot() && destination->IsStackSlot()) ||
(source->IsDoubleStackSlot() &&
destination->IsDoubleStackSlot())) {
@ -2059,8 +2068,13 @@ void CodeGenerator::AssembleSwap(InstructionOperand* source,
Operand src = g.ToOperand(source);
Operand dst = g.ToOperand(destination);
__ movq(tmp, dst);
__ xchgq(tmp, src);
__ movq(dst, tmp);
__ pushq(src);
frame_access_state()->IncreaseSPDelta(1);
src = g.ToOperand(source);
__ movq(src, tmp);
frame_access_state()->IncreaseSPDelta(-1);
dst = g.ToOperand(destination);
__ popq(dst);
} else if (source->IsDoubleRegister() && destination->IsDoubleRegister()) {
// XMM register-register swap. We rely on having xmm0
// available as a fixed scratch register.

View File

@ -369,7 +369,9 @@ void LGapResolver::EmitSwap(int index) {
// Register-register.
Register src = cgen_->ToRegister(source);
Register dst = cgen_->ToRegister(destination);
__ xchg(dst, src);
__ push(src);
__ mov(src, dst);
__ pop(dst);
} else if ((source->IsRegister() && destination->IsStackSlot()) ||
(source->IsStackSlot() && destination->IsRegister())) {

View File

@ -244,7 +244,9 @@ void LGapResolver::EmitSwap(int index) {
// Swap two general-purpose registers.
Register src = cgen_->ToRegister(source);
Register dst = cgen_->ToRegister(destination);
__ xchgq(dst, src);
__ movp(kScratchRegister, src);
__ movp(src, dst);
__ movp(dst, kScratchRegister);
} else if ((source->IsRegister() && destination->IsStackSlot()) ||
(source->IsStackSlot() && destination->IsRegister())) {

View File

@ -3341,7 +3341,9 @@ void CompareICStub::GenerateBooleans(MacroAssembler* masm) {
__ AssertSmi(eax);
__ mov(edx, FieldOperand(edx, Oddball::kToNumberOffset));
__ AssertSmi(edx);
__ xchg(eax, edx);
__ push(eax);
__ mov(eax, edx);
__ pop(edx);
}
__ sub(eax, edx);
__ Ret();

View File

@ -3291,7 +3291,9 @@ void CompareICStub::GenerateBooleans(MacroAssembler* masm) {
__ AssertSmi(rax);
__ movp(rdx, FieldOperand(rdx, Oddball::kToNumberOffset));
__ AssertSmi(rdx);
__ xchgp(rax, rdx);
__ pushq(rax);
__ movq(rax, rdx);
__ popq(rdx);
}
__ subp(rax, rdx);
__ Ret();