Fix intermixing of AVX/SSE instructions

- Add vmovdqu to the assembler
 - Fix bugs in macro assembler for instructions with immediates
 - Fix codegen

Bug: v8:9499
Change-Id: Id9a521561ed5481eb617b2d97e4af933aac7a54e
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/1707577
Reviewed-by: Bill Budge <bbudge@chromium.org>
Commit-Queue: Deepti Gandluri <gdeepti@chromium.org>
Cr-Commit-Position: refs/heads/master@{#62792}
This commit is contained in:
Deepti Gandluri 2019-07-17 17:48:22 -07:00 committed by Commit Bot
parent 58d1e842eb
commit 8ee7d962bd
5 changed files with 81 additions and 25 deletions

View File

@ -4187,6 +4187,22 @@ void Assembler::vmovq(Register dst, XMMRegister src) {
emit_sse_operand(src, dst);
}
void Assembler::vmovdqu(XMMRegister dst, Operand src) {
DCHECK(IsEnabled(AVX));
EnsureSpace ensure_space(this);
emit_vex_prefix(dst, xmm0, src, kL128, kF3, k0F, kWIG);
emit(0x6F);
emit_sse_operand(dst, src);
}
void Assembler::vmovdqu(Operand src, XMMRegister dst) {
DCHECK(IsEnabled(AVX));
EnsureSpace ensure_space(this);
emit_vex_prefix(dst, xmm0, src, kL128, kF3, k0F, kWIG);
emit(0x7F);
emit_sse_operand(dst, src);
}
void Assembler::vinstr(byte op, XMMRegister dst, XMMRegister src1,
XMMRegister src2, SIMDPrefix pp, LeadingOpcode m,
VexW w) {

View File

@ -1305,6 +1305,8 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase {
}
void vmovsd(XMMRegister dst, Operand src) { vsd(0x10, dst, xmm0, src); }
void vmovsd(Operand dst, XMMRegister src) { vsd(0x11, src, xmm0, dst); }
void vmovdqu(XMMRegister dst, Operand src);
void vmovdqu(Operand dst, XMMRegister src);
#define AVX_SP_3(instr, opcode) \
AVX_S_3(instr, opcode) \

View File

@ -1771,6 +1771,42 @@ void TurboAssembler::Pinsrd(XMMRegister dst, Operand src, int8_t imm8) {
}
}
void TurboAssembler::Psllq(XMMRegister dst, byte imm8) {
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope scope(this, AVX);
vpsllq(dst, dst, imm8);
} else {
psllq(dst, imm8);
}
}
void TurboAssembler::Psrlq(XMMRegister dst, byte imm8) {
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope scope(this, AVX);
vpsrlq(dst, dst, imm8);
} else {
psrlq(dst, imm8);
}
}
void TurboAssembler::Pslld(XMMRegister dst, byte imm8) {
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope scope(this, AVX);
vpslld(dst, dst, imm8);
} else {
pslld(dst, imm8);
}
}
void TurboAssembler::Psrld(XMMRegister dst, byte imm8) {
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope scope(this, AVX);
vpsrld(dst, dst, imm8);
} else {
psrld(dst, imm8);
}
}
void TurboAssembler::Lzcntl(Register dst, Register src) {
if (CpuFeatures::IsSupported(LZCNT)) {
CpuFeatureScope scope(this, LZCNT);

View File

@ -80,7 +80,7 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
template <typename Dst, typename... Args>
struct AvxHelper {
Assembler* assm;
// Call an method where the AVX version expects the dst argument to be
// Call a method where the AVX version expects the dst argument to be
// duplicated.
template <void (Assembler::*avx)(Dst, Dst, Args...),
void (Assembler::*no_avx)(Dst, Args...)>
@ -93,7 +93,7 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
}
}
// Call an method where the AVX version expects no duplicated dst argument.
// Call a method where the AVX version expects no duplicated dst argument.
template <void (Assembler::*avx)(Dst, Args...),
void (Assembler::*no_avx)(Dst, Args...)>
void emit(Dst dst, Args... args) {
@ -127,11 +127,8 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
AVX_OP(Movmskpd, movmskpd)
AVX_OP(Movss, movss)
AVX_OP(Movsd, movsd)
AVX_OP(Movdqu, movdqu)
AVX_OP(Pcmpeqd, pcmpeqd)
AVX_OP(Pslld, pslld)
AVX_OP(Psllq, psllq)
AVX_OP(Psrld, psrld)
AVX_OP(Psrlq, psrlq)
AVX_OP(Addss, addss)
AVX_OP(Addsd, addsd)
AVX_OP(Mulsd, mulsd)
@ -370,6 +367,11 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
void Pinsrd(XMMRegister dst, Register src, int8_t imm8);
void Pinsrd(XMMRegister dst, Operand src, int8_t imm8);
void Psllq(XMMRegister dst, byte imm8);
void Psrlq(XMMRegister dst, byte imm8);
void Pslld(XMMRegister dst, byte imm8);
void Psrld(XMMRegister dst, byte imm8);
void CompareRoot(Register with, RootIndex index);
void CompareRoot(Operand with, RootIndex index);

View File

@ -1318,16 +1318,16 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
break;
case kSSEFloat32Abs: {
// TODO(bmeurer): Use RIP relative 128-bit constants.
__ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
__ psrlq(kScratchDoubleReg, 33);
__ andps(i.OutputDoubleRegister(), kScratchDoubleReg);
__ Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
__ Psrlq(kScratchDoubleReg, 33);
__ Andps(i.OutputDoubleRegister(), kScratchDoubleReg);
break;
}
case kSSEFloat32Neg: {
// TODO(bmeurer): Use RIP relative 128-bit constants.
__ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
__ psllq(kScratchDoubleReg, 31);
__ xorps(i.OutputDoubleRegister(), kScratchDoubleReg);
__ Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
__ Psllq(kScratchDoubleReg, 31);
__ Xorps(i.OutputDoubleRegister(), kScratchDoubleReg);
break;
}
case kSSEFloat32Sqrt:
@ -1528,16 +1528,16 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
}
case kSSEFloat64Abs: {
// TODO(bmeurer): Use RIP relative 128-bit constants.
__ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
__ psrlq(kScratchDoubleReg, 1);
__ andpd(i.OutputDoubleRegister(), kScratchDoubleReg);
__ Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
__ Psrlq(kScratchDoubleReg, 1);
__ Andpd(i.OutputDoubleRegister(), kScratchDoubleReg);
break;
}
case kSSEFloat64Neg: {
// TODO(bmeurer): Use RIP relative 128-bit constants.
__ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
__ psllq(kScratchDoubleReg, 63);
__ xorpd(i.OutputDoubleRegister(), kScratchDoubleReg);
__ Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
__ Psllq(kScratchDoubleReg, 63);
__ Xorpd(i.OutputDoubleRegister(), kScratchDoubleReg);
break;
}
case kSSEFloat64Sqrt:
@ -2021,11 +2021,11 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
case kX64Movss:
EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
if (instr->HasOutput()) {
__ movss(i.OutputDoubleRegister(), i.MemoryOperand());
__ Movss(i.OutputDoubleRegister(), i.MemoryOperand());
} else {
size_t index = 0;
Operand operand = i.MemoryOperand(&index);
__ movss(operand, i.InputDoubleRegister(index));
__ Movss(operand, i.InputDoubleRegister(index));
}
break;
case kX64Movsd: {
@ -2054,11 +2054,11 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
CpuFeatureScope sse_scope(tasm(), SSSE3);
EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
if (instr->HasOutput()) {
__ movdqu(i.OutputSimd128Register(), i.MemoryOperand());
__ Movdqu(i.OutputSimd128Register(), i.MemoryOperand());
} else {
size_t index = 0;
Operand operand = i.MemoryOperand(&index);
__ movdqu(operand, i.InputSimd128Register(index));
__ Movdqu(operand, i.InputSimd128Register(index));
}
break;
}
@ -2080,7 +2080,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
if (instr->InputAt(0)->IsRegister()) {
__ Movd(i.OutputDoubleRegister(), i.InputRegister(0));
} else {
__ movss(i.OutputDoubleRegister(), i.InputOperand(0));
__ Movss(i.OutputDoubleRegister(), i.InputOperand(0));
}
break;
case kX64BitcastLD:
@ -4076,7 +4076,7 @@ void CodeGenerator::AssembleConstructFrame() {
int slot_idx = 0;
for (int i = 0; i < XMMRegister::kNumRegisters; i++) {
if (!((1 << i) & saves_fp)) continue;
__ movdqu(Operand(rsp, kQuadWordSize * slot_idx),
__ Movdqu(Operand(rsp, kQuadWordSize * slot_idx),
XMMRegister::from_code(i));
slot_idx++;
}
@ -4118,7 +4118,7 @@ void CodeGenerator::AssembleReturn(InstructionOperand* pop) {
int slot_idx = 0;
for (int i = 0; i < XMMRegister::kNumRegisters; i++) {
if (!((1 << i) & saves_fp)) continue;
__ movdqu(XMMRegister::from_code(i),
__ Movdqu(XMMRegister::from_code(i),
Operand(rsp, kQuadWordSize * slot_idx));
slot_idx++;
}