[Turbofan] Support 128 bit moves/swaps for x64.

LOG=N
BUG=v8:4124

Review-Url: https://codereview.chromium.org/2139513002
Cr-Commit-Position: refs/heads/master@{#37654}
This commit is contained in:
bbudge 2016-07-11 11:18:17 -07:00 committed by Commit bot
parent 8474f24450
commit 56d013d481
7 changed files with 139 additions and 20 deletions

View File

@ -2358,18 +2358,34 @@ void CodeGenerator::AssembleMove(InstructionOperand* source,
} else {
DCHECK(destination->IsFPStackSlot());
Operand dst = g.ToOperand(destination);
MachineRepresentation rep =
LocationOperand::cast(source)->representation();
if (rep != MachineRepresentation::kSimd128) {
__ Movsd(dst, src);
} else {
__ Movups(dst, src);
}
}
} else if (source->IsFPStackSlot()) {
DCHECK(destination->IsFPRegister() || destination->IsFPStackSlot());
Operand src = g.ToOperand(source);
MachineRepresentation rep = LocationOperand::cast(source)->representation();
if (destination->IsFPRegister()) {
XMMRegister dst = g.ToDoubleRegister(destination);
if (rep != MachineRepresentation::kSimd128) {
__ Movsd(dst, src);
} else {
__ Movups(dst, src);
}
} else {
Operand dst = g.ToOperand(destination);
if (rep != MachineRepresentation::kSimd128) {
__ Movsd(kScratchDoubleReg, src);
__ Movsd(dst, kScratchDoubleReg);
} else {
__ Movups(kScratchDoubleReg, src);
__ Movups(dst, kScratchDoubleReg);
}
}
} else {
UNREACHABLE();
@ -2401,9 +2417,11 @@ void CodeGenerator::AssembleSwap(InstructionOperand* source,
} else if ((source->IsStackSlot() && destination->IsStackSlot()) ||
(source->IsFPStackSlot() && destination->IsFPStackSlot())) {
// Memory-memory.
Register tmp = kScratchRegister;
Operand src = g.ToOperand(source);
Operand dst = g.ToOperand(destination);
MachineRepresentation rep = LocationOperand::cast(source)->representation();
if (rep != MachineRepresentation::kSimd128) {
Register tmp = kScratchRegister;
__ movq(tmp, dst);
__ pushq(src);
frame_access_state()->IncreaseSPDelta(1);
@ -2412,20 +2430,44 @@ void CodeGenerator::AssembleSwap(InstructionOperand* source,
frame_access_state()->IncreaseSPDelta(-1);
dst = g.ToOperand(destination);
__ popq(dst);
} else {
// Use the XOR trick to swap without a temporary.
__ Movups(kScratchDoubleReg, src);
__ Xorps(kScratchDoubleReg, dst); // scratch contains src ^ dst.
__ Movups(src, kScratchDoubleReg);
__ Xorps(kScratchDoubleReg, dst); // scratch contains src.
__ Movups(dst, kScratchDoubleReg);
__ Xorps(kScratchDoubleReg, src); // scratch contains dst.
__ Movups(src, kScratchDoubleReg);
}
} else if (source->IsFPRegister() && destination->IsFPRegister()) {
// XMM register-register swap.
XMMRegister src = g.ToDoubleRegister(source);
XMMRegister dst = g.ToDoubleRegister(destination);
MachineRepresentation rep = LocationOperand::cast(source)->representation();
if (rep != MachineRepresentation::kSimd128) {
__ Movapd(kScratchDoubleReg, src);
__ Movapd(src, dst);
__ Movapd(dst, kScratchDoubleReg);
} else {
__ Movups(kScratchDoubleReg, src);
__ Movups(src, dst);
__ Movups(dst, kScratchDoubleReg);
}
} else if (source->IsFPRegister() && destination->IsFPStackSlot()) {
// XMM register-memory swap.
XMMRegister src = g.ToDoubleRegister(source);
Operand dst = g.ToOperand(destination);
MachineRepresentation rep = LocationOperand::cast(source)->representation();
if (rep != MachineRepresentation::kSimd128) {
__ Movsd(kScratchDoubleReg, src);
__ Movsd(src, dst);
__ Movsd(dst, kScratchDoubleReg);
} else {
__ Movups(kScratchDoubleReg, src);
__ Movups(src, dst);
__ Movups(dst, kScratchDoubleReg);
}
} else {
// No other combinations are possible.
UNREACHABLE();

View File

@ -3898,7 +3898,6 @@ void Assembler::vps(byte op, XMMRegister dst, XMMRegister src1,
emit_sse_operand(dst, src2);
}
void Assembler::vps(byte op, XMMRegister dst, XMMRegister src1,
const Operand& src2) {
DCHECK(IsEnabled(AVX));
@ -3908,6 +3907,14 @@ void Assembler::vps(byte op, XMMRegister dst, XMMRegister src1,
emit_sse_operand(dst, src2);
}
void Assembler::vps(byte op, const Operand& dst, XMMRegister src1,
XMMRegister src2) {
DCHECK(IsEnabled(AVX));
EnsureSpace ensure_space(this);
emit_vex_prefix(src2, src1, dst, kL128, kNone, k0F, kWIG);
emit(op);
emit_sse_operand(src2, dst);
}
void Assembler::vpd(byte op, XMMRegister dst, XMMRegister src1,
XMMRegister src2) {

View File

@ -1531,6 +1531,13 @@ class Assembler : public AssemblerBase {
void vss(byte op, XMMRegister dst, XMMRegister src1, const Operand& src2);
void vmovaps(XMMRegister dst, XMMRegister src) { vps(0x28, dst, xmm0, src); }
void vmovups(XMMRegister dst, XMMRegister src) { vps(0x10, dst, xmm0, src); }
void vmovups(XMMRegister dst, const Operand& src) {
vps(0x11, dst, xmm0, src);
}
void vmovups(const Operand& dst, XMMRegister src) {
vps(0x11, dst, xmm0, src);
}
void vmovapd(XMMRegister dst, XMMRegister src) { vpd(0x28, dst, xmm0, src); }
void vmovmskpd(Register dst, XMMRegister src) {
XMMRegister idst = {dst.code()};
@ -1539,6 +1546,7 @@ class Assembler : public AssemblerBase {
void vps(byte op, XMMRegister dst, XMMRegister src1, XMMRegister src2);
void vps(byte op, XMMRegister dst, XMMRegister src1, const Operand& src2);
void vps(byte op, const Operand& dst, XMMRegister src1, XMMRegister src2);
void vpd(byte op, XMMRegister dst, XMMRegister src1, XMMRegister src2);
void vpd(byte op, XMMRegister dst, XMMRegister src1, const Operand& src2);

View File

@ -1227,6 +1227,15 @@ int DisassemblerX64::AVXInstruction(byte* data) {
int mod, regop, rm, vvvv = vex_vreg();
get_modrm(*current, &mod, &regop, &rm);
switch (opcode) {
case 0x10:
AppendToBuffer("vmovups %s,", NameOfXMMRegister(regop));
current += PrintRightXMMOperand(current);
break;
case 0x11:
AppendToBuffer("vmovups ");
current += PrintRightXMMOperand(current);
AppendToBuffer(",%s", NameOfXMMRegister(regop));
break;
case 0x28:
AppendToBuffer("vmovaps %s,", NameOfXMMRegister(regop));
current += PrintRightXMMOperand(current);

View File

@ -2718,6 +2718,32 @@ void MacroAssembler::Movaps(XMMRegister dst, XMMRegister src) {
}
}
void MacroAssembler::Movups(XMMRegister dst, XMMRegister src) {
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope scope(this, AVX);
vmovups(dst, src);
} else {
movups(dst, src);
}
}
void MacroAssembler::Movups(XMMRegister dst, const Operand& src) {
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope scope(this, AVX);
vmovups(dst, src);
} else {
movups(dst, src);
}
}
void MacroAssembler::Movups(const Operand& dst, XMMRegister src) {
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope scope(this, AVX);
vmovups(dst, src);
} else {
movups(dst, src);
}
}
void MacroAssembler::Movapd(XMMRegister dst, XMMRegister src) {
if (CpuFeatures::IsSupported(AVX)) {
@ -2848,6 +2874,23 @@ void MacroAssembler::Movmskpd(Register dst, XMMRegister src) {
}
}
void MacroAssembler::Xorps(XMMRegister dst, XMMRegister src) {
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope scope(this, AVX);
vxorps(dst, kScratchDoubleReg, src);
} else {
xorps(dst, src);
}
}
void MacroAssembler::Xorps(XMMRegister dst, const Operand& src) {
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope scope(this, AVX);
vxorps(dst, kScratchDoubleReg, src);
} else {
xorps(dst, src);
}
}
void MacroAssembler::Roundss(XMMRegister dst, XMMRegister src,
RoundingMode mode) {

View File

@ -983,9 +983,15 @@ class MacroAssembler: public Assembler {
void Movq(Register dst, XMMRegister src);
void Movaps(XMMRegister dst, XMMRegister src);
void Movups(XMMRegister dst, XMMRegister src);
void Movups(XMMRegister dst, const Operand& src);
void Movups(const Operand& dst, XMMRegister src);
void Movapd(XMMRegister dst, XMMRegister src);
void Movmskpd(Register dst, XMMRegister src);
void Xorps(XMMRegister dst, XMMRegister src);
void Xorps(XMMRegister dst, const Operand& src);
void Roundss(XMMRegister dst, XMMRegister src, RoundingMode mode);
void Roundsd(XMMRegister dst, XMMRegister src, RoundingMode mode);
void Sqrtsd(XMMRegister dst, XMMRegister src);

View File

@ -599,6 +599,10 @@ TEST(DisasmX64) {
__ vmovapd(xmm7, xmm0);
__ vmovmskpd(r9, xmm4);
__ vmovups(xmm5, xmm1);
__ vmovups(xmm5, Operand(rdx, 4));
__ vmovups(Operand(rdx, 4), xmm5);
__ vandps(xmm0, xmm9, xmm2);
__ vandps(xmm9, xmm1, Operand(rbx, rcx, times_4, 10000));
__ vxorps(xmm0, xmm1, xmm9);