[turbofan] avoid xchg instruction on Intel
On Intel, xchg stalls the pipeline. We use xchg to implement swap moves. In a separate exploration, the presence of xchg in a very hot loop, due to a change in register allocation, lead to over 20% regression. Simply changing that instruction with push/mov/pop (almost) eliminated the regression. In light of that, I removed uses of xchg. This leads to more instructions, though. That is particularly problematic for long cycles, which, today, we translate to successions of swaps. I plan to address this cycle issue in a separate change. For now, the goal is to unblock the initial work that lead here. Review URL: https://codereview.chromium.org/1580233003 Cr-Commit-Position: refs/heads/master@{#33282}
This commit is contained in:
parent
1357357647
commit
7440edae1d
@ -1616,10 +1616,19 @@ void CodeGenerator::AssembleSwap(InstructionOperand* source,
|
||||
// Register-register.
|
||||
Register src = g.ToRegister(source);
|
||||
Register dst = g.ToRegister(destination);
|
||||
__ xchg(dst, src);
|
||||
__ push(src);
|
||||
__ mov(src, dst);
|
||||
__ pop(dst);
|
||||
} else if (source->IsRegister() && destination->IsStackSlot()) {
|
||||
// Register-memory.
|
||||
__ xchg(g.ToRegister(source), g.ToOperand(destination));
|
||||
Register src = g.ToRegister(source);
|
||||
__ push(src);
|
||||
frame_access_state()->IncreaseSPDelta(1);
|
||||
Operand dst = g.ToOperand(destination);
|
||||
__ mov(src, dst);
|
||||
frame_access_state()->IncreaseSPDelta(-1);
|
||||
dst = g.ToOperand(destination);
|
||||
__ pop(dst);
|
||||
} else if (source->IsStackSlot() && destination->IsStackSlot()) {
|
||||
// Memory-memory.
|
||||
Operand dst1 = g.ToOperand(destination);
|
||||
|
@ -2046,11 +2046,20 @@ void CodeGenerator::AssembleSwap(InstructionOperand* source,
|
||||
// combinations are possible.
|
||||
if (source->IsRegister() && destination->IsRegister()) {
|
||||
// Register-register.
|
||||
__ xchgq(g.ToRegister(source), g.ToRegister(destination));
|
||||
Register src = g.ToRegister(source);
|
||||
Register dst = g.ToRegister(destination);
|
||||
__ movq(kScratchRegister, src);
|
||||
__ movq(src, dst);
|
||||
__ movq(dst, kScratchRegister);
|
||||
} else if (source->IsRegister() && destination->IsStackSlot()) {
|
||||
Register src = g.ToRegister(source);
|
||||
__ pushq(src);
|
||||
frame_access_state()->IncreaseSPDelta(1);
|
||||
Operand dst = g.ToOperand(destination);
|
||||
__ xchgq(src, dst);
|
||||
__ movq(src, dst);
|
||||
frame_access_state()->IncreaseSPDelta(-1);
|
||||
dst = g.ToOperand(destination);
|
||||
__ popq(dst);
|
||||
} else if ((source->IsStackSlot() && destination->IsStackSlot()) ||
|
||||
(source->IsDoubleStackSlot() &&
|
||||
destination->IsDoubleStackSlot())) {
|
||||
@ -2059,8 +2068,13 @@ void CodeGenerator::AssembleSwap(InstructionOperand* source,
|
||||
Operand src = g.ToOperand(source);
|
||||
Operand dst = g.ToOperand(destination);
|
||||
__ movq(tmp, dst);
|
||||
__ xchgq(tmp, src);
|
||||
__ movq(dst, tmp);
|
||||
__ pushq(src);
|
||||
frame_access_state()->IncreaseSPDelta(1);
|
||||
src = g.ToOperand(source);
|
||||
__ movq(src, tmp);
|
||||
frame_access_state()->IncreaseSPDelta(-1);
|
||||
dst = g.ToOperand(destination);
|
||||
__ popq(dst);
|
||||
} else if (source->IsDoubleRegister() && destination->IsDoubleRegister()) {
|
||||
// XMM register-register swap. We rely on having xmm0
|
||||
// available as a fixed scratch register.
|
||||
|
@ -369,7 +369,9 @@ void LGapResolver::EmitSwap(int index) {
|
||||
// Register-register.
|
||||
Register src = cgen_->ToRegister(source);
|
||||
Register dst = cgen_->ToRegister(destination);
|
||||
__ xchg(dst, src);
|
||||
__ push(src);
|
||||
__ mov(src, dst);
|
||||
__ pop(dst);
|
||||
|
||||
} else if ((source->IsRegister() && destination->IsStackSlot()) ||
|
||||
(source->IsStackSlot() && destination->IsRegister())) {
|
||||
|
@ -244,7 +244,9 @@ void LGapResolver::EmitSwap(int index) {
|
||||
// Swap two general-purpose registers.
|
||||
Register src = cgen_->ToRegister(source);
|
||||
Register dst = cgen_->ToRegister(destination);
|
||||
__ xchgq(dst, src);
|
||||
__ movp(kScratchRegister, src);
|
||||
__ movp(src, dst);
|
||||
__ movp(dst, kScratchRegister);
|
||||
|
||||
} else if ((source->IsRegister() && destination->IsStackSlot()) ||
|
||||
(source->IsStackSlot() && destination->IsRegister())) {
|
||||
|
@ -3341,7 +3341,9 @@ void CompareICStub::GenerateBooleans(MacroAssembler* masm) {
|
||||
__ AssertSmi(eax);
|
||||
__ mov(edx, FieldOperand(edx, Oddball::kToNumberOffset));
|
||||
__ AssertSmi(edx);
|
||||
__ xchg(eax, edx);
|
||||
__ push(eax);
|
||||
__ mov(eax, edx);
|
||||
__ pop(edx);
|
||||
}
|
||||
__ sub(eax, edx);
|
||||
__ Ret();
|
||||
|
@ -3291,7 +3291,9 @@ void CompareICStub::GenerateBooleans(MacroAssembler* masm) {
|
||||
__ AssertSmi(rax);
|
||||
__ movp(rdx, FieldOperand(rdx, Oddball::kToNumberOffset));
|
||||
__ AssertSmi(rdx);
|
||||
__ xchgp(rax, rdx);
|
||||
__ pushq(rax);
|
||||
__ movq(rax, rdx);
|
||||
__ popq(rdx);
|
||||
}
|
||||
__ subp(rax, rdx);
|
||||
__ Ret();
|
||||
|
Loading…
Reference in New Issue
Block a user