[Turbofan] Support 128 bit moves/swaps for x64.

LOG=N
BUG=v8:4124

Review-Url: https://codereview.chromium.org/2139513002
Cr-Commit-Position: refs/heads/master@{#37654}
This commit is contained in:
bbudge 2016-07-11 11:18:17 -07:00 committed by Commit bot
parent 8474f24450
commit 56d013d481
7 changed files with 139 additions and 20 deletions

View File

@ -2358,18 +2358,34 @@ void CodeGenerator::AssembleMove(InstructionOperand* source,
} else { } else {
DCHECK(destination->IsFPStackSlot()); DCHECK(destination->IsFPStackSlot());
Operand dst = g.ToOperand(destination); Operand dst = g.ToOperand(destination);
__ Movsd(dst, src); MachineRepresentation rep =
LocationOperand::cast(source)->representation();
if (rep != MachineRepresentation::kSimd128) {
__ Movsd(dst, src);
} else {
__ Movups(dst, src);
}
} }
} else if (source->IsFPStackSlot()) { } else if (source->IsFPStackSlot()) {
DCHECK(destination->IsFPRegister() || destination->IsFPStackSlot()); DCHECK(destination->IsFPRegister() || destination->IsFPStackSlot());
Operand src = g.ToOperand(source); Operand src = g.ToOperand(source);
MachineRepresentation rep = LocationOperand::cast(source)->representation();
if (destination->IsFPRegister()) { if (destination->IsFPRegister()) {
XMMRegister dst = g.ToDoubleRegister(destination); XMMRegister dst = g.ToDoubleRegister(destination);
__ Movsd(dst, src); if (rep != MachineRepresentation::kSimd128) {
__ Movsd(dst, src);
} else {
__ Movups(dst, src);
}
} else { } else {
Operand dst = g.ToOperand(destination); Operand dst = g.ToOperand(destination);
__ Movsd(kScratchDoubleReg, src); if (rep != MachineRepresentation::kSimd128) {
__ Movsd(dst, kScratchDoubleReg); __ Movsd(kScratchDoubleReg, src);
__ Movsd(dst, kScratchDoubleReg);
} else {
__ Movups(kScratchDoubleReg, src);
__ Movups(dst, kScratchDoubleReg);
}
} }
} else { } else {
UNREACHABLE(); UNREACHABLE();
@ -2401,31 +2417,57 @@ void CodeGenerator::AssembleSwap(InstructionOperand* source,
} else if ((source->IsStackSlot() && destination->IsStackSlot()) || } else if ((source->IsStackSlot() && destination->IsStackSlot()) ||
(source->IsFPStackSlot() && destination->IsFPStackSlot())) { (source->IsFPStackSlot() && destination->IsFPStackSlot())) {
// Memory-memory. // Memory-memory.
Register tmp = kScratchRegister;
Operand src = g.ToOperand(source); Operand src = g.ToOperand(source);
Operand dst = g.ToOperand(destination); Operand dst = g.ToOperand(destination);
__ movq(tmp, dst); MachineRepresentation rep = LocationOperand::cast(source)->representation();
__ pushq(src); if (rep != MachineRepresentation::kSimd128) {
frame_access_state()->IncreaseSPDelta(1); Register tmp = kScratchRegister;
src = g.ToOperand(source); __ movq(tmp, dst);
__ movq(src, tmp); __ pushq(src);
frame_access_state()->IncreaseSPDelta(-1); frame_access_state()->IncreaseSPDelta(1);
dst = g.ToOperand(destination); src = g.ToOperand(source);
__ popq(dst); __ movq(src, tmp);
frame_access_state()->IncreaseSPDelta(-1);
dst = g.ToOperand(destination);
__ popq(dst);
} else {
// Use the XOR trick to swap without a temporary.
__ Movups(kScratchDoubleReg, src);
__ Xorps(kScratchDoubleReg, dst); // scratch contains src ^ dst.
__ Movups(src, kScratchDoubleReg);
__ Xorps(kScratchDoubleReg, dst); // scratch contains src.
__ Movups(dst, kScratchDoubleReg);
__ Xorps(kScratchDoubleReg, src); // scratch contains dst.
__ Movups(src, kScratchDoubleReg);
}
} else if (source->IsFPRegister() && destination->IsFPRegister()) { } else if (source->IsFPRegister() && destination->IsFPRegister()) {
// XMM register-register swap. // XMM register-register swap.
XMMRegister src = g.ToDoubleRegister(source); XMMRegister src = g.ToDoubleRegister(source);
XMMRegister dst = g.ToDoubleRegister(destination); XMMRegister dst = g.ToDoubleRegister(destination);
__ Movapd(kScratchDoubleReg, src); MachineRepresentation rep = LocationOperand::cast(source)->representation();
__ Movapd(src, dst); if (rep != MachineRepresentation::kSimd128) {
__ Movapd(dst, kScratchDoubleReg); __ Movapd(kScratchDoubleReg, src);
__ Movapd(src, dst);
__ Movapd(dst, kScratchDoubleReg);
} else {
__ Movups(kScratchDoubleReg, src);
__ Movups(src, dst);
__ Movups(dst, kScratchDoubleReg);
}
} else if (source->IsFPRegister() && destination->IsFPStackSlot()) { } else if (source->IsFPRegister() && destination->IsFPStackSlot()) {
// XMM register-memory swap. // XMM register-memory swap.
XMMRegister src = g.ToDoubleRegister(source); XMMRegister src = g.ToDoubleRegister(source);
Operand dst = g.ToOperand(destination); Operand dst = g.ToOperand(destination);
__ Movsd(kScratchDoubleReg, src); MachineRepresentation rep = LocationOperand::cast(source)->representation();
__ Movsd(src, dst); if (rep != MachineRepresentation::kSimd128) {
__ Movsd(dst, kScratchDoubleReg); __ Movsd(kScratchDoubleReg, src);
__ Movsd(src, dst);
__ Movsd(dst, kScratchDoubleReg);
} else {
__ Movups(kScratchDoubleReg, src);
__ Movups(src, dst);
__ Movups(dst, kScratchDoubleReg);
}
} else { } else {
// No other combinations are possible. // No other combinations are possible.
UNREACHABLE(); UNREACHABLE();

View File

@ -3898,7 +3898,6 @@ void Assembler::vps(byte op, XMMRegister dst, XMMRegister src1,
emit_sse_operand(dst, src2); emit_sse_operand(dst, src2);
} }
void Assembler::vps(byte op, XMMRegister dst, XMMRegister src1, void Assembler::vps(byte op, XMMRegister dst, XMMRegister src1,
const Operand& src2) { const Operand& src2) {
DCHECK(IsEnabled(AVX)); DCHECK(IsEnabled(AVX));
@ -3908,6 +3907,14 @@ void Assembler::vps(byte op, XMMRegister dst, XMMRegister src1,
emit_sse_operand(dst, src2); emit_sse_operand(dst, src2);
} }
void Assembler::vps(byte op, const Operand& dst, XMMRegister src1,
XMMRegister src2) {
DCHECK(IsEnabled(AVX));
EnsureSpace ensure_space(this);
emit_vex_prefix(src2, src1, dst, kL128, kNone, k0F, kWIG);
emit(op);
emit_sse_operand(src2, dst);
}
void Assembler::vpd(byte op, XMMRegister dst, XMMRegister src1, void Assembler::vpd(byte op, XMMRegister dst, XMMRegister src1,
XMMRegister src2) { XMMRegister src2) {

View File

@ -1531,6 +1531,13 @@ class Assembler : public AssemblerBase {
void vss(byte op, XMMRegister dst, XMMRegister src1, const Operand& src2); void vss(byte op, XMMRegister dst, XMMRegister src1, const Operand& src2);
void vmovaps(XMMRegister dst, XMMRegister src) { vps(0x28, dst, xmm0, src); } void vmovaps(XMMRegister dst, XMMRegister src) { vps(0x28, dst, xmm0, src); }
void vmovups(XMMRegister dst, XMMRegister src) { vps(0x10, dst, xmm0, src); }
void vmovups(XMMRegister dst, const Operand& src) {
vps(0x11, dst, xmm0, src);
}
void vmovups(const Operand& dst, XMMRegister src) {
vps(0x11, dst, xmm0, src);
}
void vmovapd(XMMRegister dst, XMMRegister src) { vpd(0x28, dst, xmm0, src); } void vmovapd(XMMRegister dst, XMMRegister src) { vpd(0x28, dst, xmm0, src); }
void vmovmskpd(Register dst, XMMRegister src) { void vmovmskpd(Register dst, XMMRegister src) {
XMMRegister idst = {dst.code()}; XMMRegister idst = {dst.code()};
@ -1539,6 +1546,7 @@ class Assembler : public AssemblerBase {
void vps(byte op, XMMRegister dst, XMMRegister src1, XMMRegister src2); void vps(byte op, XMMRegister dst, XMMRegister src1, XMMRegister src2);
void vps(byte op, XMMRegister dst, XMMRegister src1, const Operand& src2); void vps(byte op, XMMRegister dst, XMMRegister src1, const Operand& src2);
void vps(byte op, const Operand& dst, XMMRegister src1, XMMRegister src2);
void vpd(byte op, XMMRegister dst, XMMRegister src1, XMMRegister src2); void vpd(byte op, XMMRegister dst, XMMRegister src1, XMMRegister src2);
void vpd(byte op, XMMRegister dst, XMMRegister src1, const Operand& src2); void vpd(byte op, XMMRegister dst, XMMRegister src1, const Operand& src2);

View File

@ -1227,6 +1227,15 @@ int DisassemblerX64::AVXInstruction(byte* data) {
int mod, regop, rm, vvvv = vex_vreg(); int mod, regop, rm, vvvv = vex_vreg();
get_modrm(*current, &mod, &regop, &rm); get_modrm(*current, &mod, &regop, &rm);
switch (opcode) { switch (opcode) {
case 0x10:
AppendToBuffer("vmovups %s,", NameOfXMMRegister(regop));
current += PrintRightXMMOperand(current);
break;
case 0x11:
AppendToBuffer("vmovups ");
current += PrintRightXMMOperand(current);
AppendToBuffer(",%s", NameOfXMMRegister(regop));
break;
case 0x28: case 0x28:
AppendToBuffer("vmovaps %s,", NameOfXMMRegister(regop)); AppendToBuffer("vmovaps %s,", NameOfXMMRegister(regop));
current += PrintRightXMMOperand(current); current += PrintRightXMMOperand(current);

View File

@ -2718,6 +2718,32 @@ void MacroAssembler::Movaps(XMMRegister dst, XMMRegister src) {
} }
} }
void MacroAssembler::Movups(XMMRegister dst, XMMRegister src) {
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope scope(this, AVX);
vmovups(dst, src);
} else {
movups(dst, src);
}
}
void MacroAssembler::Movups(XMMRegister dst, const Operand& src) {
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope scope(this, AVX);
vmovups(dst, src);
} else {
movups(dst, src);
}
}
void MacroAssembler::Movups(const Operand& dst, XMMRegister src) {
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope scope(this, AVX);
vmovups(dst, src);
} else {
movups(dst, src);
}
}
void MacroAssembler::Movapd(XMMRegister dst, XMMRegister src) { void MacroAssembler::Movapd(XMMRegister dst, XMMRegister src) {
if (CpuFeatures::IsSupported(AVX)) { if (CpuFeatures::IsSupported(AVX)) {
@ -2848,6 +2874,23 @@ void MacroAssembler::Movmskpd(Register dst, XMMRegister src) {
} }
} }
void MacroAssembler::Xorps(XMMRegister dst, XMMRegister src) {
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope scope(this, AVX);
vxorps(dst, kScratchDoubleReg, src);
} else {
xorps(dst, src);
}
}
void MacroAssembler::Xorps(XMMRegister dst, const Operand& src) {
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope scope(this, AVX);
vxorps(dst, kScratchDoubleReg, src);
} else {
xorps(dst, src);
}
}
void MacroAssembler::Roundss(XMMRegister dst, XMMRegister src, void MacroAssembler::Roundss(XMMRegister dst, XMMRegister src,
RoundingMode mode) { RoundingMode mode) {

View File

@ -983,9 +983,15 @@ class MacroAssembler: public Assembler {
void Movq(Register dst, XMMRegister src); void Movq(Register dst, XMMRegister src);
void Movaps(XMMRegister dst, XMMRegister src); void Movaps(XMMRegister dst, XMMRegister src);
void Movups(XMMRegister dst, XMMRegister src);
void Movups(XMMRegister dst, const Operand& src);
void Movups(const Operand& dst, XMMRegister src);
void Movapd(XMMRegister dst, XMMRegister src); void Movapd(XMMRegister dst, XMMRegister src);
void Movmskpd(Register dst, XMMRegister src); void Movmskpd(Register dst, XMMRegister src);
void Xorps(XMMRegister dst, XMMRegister src);
void Xorps(XMMRegister dst, const Operand& src);
void Roundss(XMMRegister dst, XMMRegister src, RoundingMode mode); void Roundss(XMMRegister dst, XMMRegister src, RoundingMode mode);
void Roundsd(XMMRegister dst, XMMRegister src, RoundingMode mode); void Roundsd(XMMRegister dst, XMMRegister src, RoundingMode mode);
void Sqrtsd(XMMRegister dst, XMMRegister src); void Sqrtsd(XMMRegister dst, XMMRegister src);

View File

@ -599,6 +599,10 @@ TEST(DisasmX64) {
__ vmovapd(xmm7, xmm0); __ vmovapd(xmm7, xmm0);
__ vmovmskpd(r9, xmm4); __ vmovmskpd(r9, xmm4);
__ vmovups(xmm5, xmm1);
__ vmovups(xmm5, Operand(rdx, 4));
__ vmovups(Operand(rdx, 4), xmm5);
__ vandps(xmm0, xmm9, xmm2); __ vandps(xmm0, xmm9, xmm2);
__ vandps(xmm9, xmm1, Operand(rbx, rcx, times_4, 10000)); __ vandps(xmm9, xmm1, Operand(rbx, rcx, times_4, 10000));
__ vxorps(xmm0, xmm1, xmm9); __ vxorps(xmm0, xmm1, xmm9);