Fix intermixing of AVX/SSE instructions
- Add vmovdqu to the assembler - Fix bugs in macro assembler for instructions with immediates - Fix codegen Bug: v8:9499 Change-Id: Id9a521561ed5481eb617b2d97e4af933aac7a54e Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/1707577 Reviewed-by: Bill Budge <bbudge@chromium.org> Commit-Queue: Deepti Gandluri <gdeepti@chromium.org> Cr-Commit-Position: refs/heads/master@{#62792}
This commit is contained in:
parent
58d1e842eb
commit
8ee7d962bd
@ -4187,6 +4187,22 @@ void Assembler::vmovq(Register dst, XMMRegister src) {
|
||||
emit_sse_operand(src, dst);
|
||||
}
|
||||
|
||||
void Assembler::vmovdqu(XMMRegister dst, Operand src) {
|
||||
DCHECK(IsEnabled(AVX));
|
||||
EnsureSpace ensure_space(this);
|
||||
emit_vex_prefix(dst, xmm0, src, kL128, kF3, k0F, kWIG);
|
||||
emit(0x6F);
|
||||
emit_sse_operand(dst, src);
|
||||
}
|
||||
|
||||
void Assembler::vmovdqu(Operand src, XMMRegister dst) {
|
||||
DCHECK(IsEnabled(AVX));
|
||||
EnsureSpace ensure_space(this);
|
||||
emit_vex_prefix(dst, xmm0, src, kL128, kF3, k0F, kWIG);
|
||||
emit(0x7F);
|
||||
emit_sse_operand(dst, src);
|
||||
}
|
||||
|
||||
void Assembler::vinstr(byte op, XMMRegister dst, XMMRegister src1,
|
||||
XMMRegister src2, SIMDPrefix pp, LeadingOpcode m,
|
||||
VexW w) {
|
||||
|
@ -1305,6 +1305,8 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase {
|
||||
}
|
||||
void vmovsd(XMMRegister dst, Operand src) { vsd(0x10, dst, xmm0, src); }
|
||||
void vmovsd(Operand dst, XMMRegister src) { vsd(0x11, src, xmm0, dst); }
|
||||
void vmovdqu(XMMRegister dst, Operand src);
|
||||
void vmovdqu(Operand dst, XMMRegister src);
|
||||
|
||||
#define AVX_SP_3(instr, opcode) \
|
||||
AVX_S_3(instr, opcode) \
|
||||
|
@ -1771,6 +1771,42 @@ void TurboAssembler::Pinsrd(XMMRegister dst, Operand src, int8_t imm8) {
|
||||
}
|
||||
}
|
||||
|
||||
void TurboAssembler::Psllq(XMMRegister dst, byte imm8) {
|
||||
if (CpuFeatures::IsSupported(AVX)) {
|
||||
CpuFeatureScope scope(this, AVX);
|
||||
vpsllq(dst, dst, imm8);
|
||||
} else {
|
||||
psllq(dst, imm8);
|
||||
}
|
||||
}
|
||||
|
||||
void TurboAssembler::Psrlq(XMMRegister dst, byte imm8) {
|
||||
if (CpuFeatures::IsSupported(AVX)) {
|
||||
CpuFeatureScope scope(this, AVX);
|
||||
vpsrlq(dst, dst, imm8);
|
||||
} else {
|
||||
psrlq(dst, imm8);
|
||||
}
|
||||
}
|
||||
|
||||
void TurboAssembler::Pslld(XMMRegister dst, byte imm8) {
|
||||
if (CpuFeatures::IsSupported(AVX)) {
|
||||
CpuFeatureScope scope(this, AVX);
|
||||
vpslld(dst, dst, imm8);
|
||||
} else {
|
||||
pslld(dst, imm8);
|
||||
}
|
||||
}
|
||||
|
||||
void TurboAssembler::Psrld(XMMRegister dst, byte imm8) {
|
||||
if (CpuFeatures::IsSupported(AVX)) {
|
||||
CpuFeatureScope scope(this, AVX);
|
||||
vpsrld(dst, dst, imm8);
|
||||
} else {
|
||||
psrld(dst, imm8);
|
||||
}
|
||||
}
|
||||
|
||||
void TurboAssembler::Lzcntl(Register dst, Register src) {
|
||||
if (CpuFeatures::IsSupported(LZCNT)) {
|
||||
CpuFeatureScope scope(this, LZCNT);
|
||||
|
@ -80,7 +80,7 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
|
||||
template <typename Dst, typename... Args>
|
||||
struct AvxHelper {
|
||||
Assembler* assm;
|
||||
// Call an method where the AVX version expects the dst argument to be
|
||||
// Call a method where the AVX version expects the dst argument to be
|
||||
// duplicated.
|
||||
template <void (Assembler::*avx)(Dst, Dst, Args...),
|
||||
void (Assembler::*no_avx)(Dst, Args...)>
|
||||
@ -93,7 +93,7 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
|
||||
}
|
||||
}
|
||||
|
||||
// Call an method where the AVX version expects no duplicated dst argument.
|
||||
// Call a method where the AVX version expects no duplicated dst argument.
|
||||
template <void (Assembler::*avx)(Dst, Args...),
|
||||
void (Assembler::*no_avx)(Dst, Args...)>
|
||||
void emit(Dst dst, Args... args) {
|
||||
@ -127,11 +127,8 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
|
||||
AVX_OP(Movmskpd, movmskpd)
|
||||
AVX_OP(Movss, movss)
|
||||
AVX_OP(Movsd, movsd)
|
||||
AVX_OP(Movdqu, movdqu)
|
||||
AVX_OP(Pcmpeqd, pcmpeqd)
|
||||
AVX_OP(Pslld, pslld)
|
||||
AVX_OP(Psllq, psllq)
|
||||
AVX_OP(Psrld, psrld)
|
||||
AVX_OP(Psrlq, psrlq)
|
||||
AVX_OP(Addss, addss)
|
||||
AVX_OP(Addsd, addsd)
|
||||
AVX_OP(Mulsd, mulsd)
|
||||
@ -370,6 +367,11 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
|
||||
void Pinsrd(XMMRegister dst, Register src, int8_t imm8);
|
||||
void Pinsrd(XMMRegister dst, Operand src, int8_t imm8);
|
||||
|
||||
void Psllq(XMMRegister dst, byte imm8);
|
||||
void Psrlq(XMMRegister dst, byte imm8);
|
||||
void Pslld(XMMRegister dst, byte imm8);
|
||||
void Psrld(XMMRegister dst, byte imm8);
|
||||
|
||||
void CompareRoot(Register with, RootIndex index);
|
||||
void CompareRoot(Operand with, RootIndex index);
|
||||
|
||||
|
@ -1318,16 +1318,16 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
|
||||
break;
|
||||
case kSSEFloat32Abs: {
|
||||
// TODO(bmeurer): Use RIP relative 128-bit constants.
|
||||
__ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
|
||||
__ psrlq(kScratchDoubleReg, 33);
|
||||
__ andps(i.OutputDoubleRegister(), kScratchDoubleReg);
|
||||
__ Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
|
||||
__ Psrlq(kScratchDoubleReg, 33);
|
||||
__ Andps(i.OutputDoubleRegister(), kScratchDoubleReg);
|
||||
break;
|
||||
}
|
||||
case kSSEFloat32Neg: {
|
||||
// TODO(bmeurer): Use RIP relative 128-bit constants.
|
||||
__ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
|
||||
__ psllq(kScratchDoubleReg, 31);
|
||||
__ xorps(i.OutputDoubleRegister(), kScratchDoubleReg);
|
||||
__ Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
|
||||
__ Psllq(kScratchDoubleReg, 31);
|
||||
__ Xorps(i.OutputDoubleRegister(), kScratchDoubleReg);
|
||||
break;
|
||||
}
|
||||
case kSSEFloat32Sqrt:
|
||||
@ -1528,16 +1528,16 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
|
||||
}
|
||||
case kSSEFloat64Abs: {
|
||||
// TODO(bmeurer): Use RIP relative 128-bit constants.
|
||||
__ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
|
||||
__ psrlq(kScratchDoubleReg, 1);
|
||||
__ andpd(i.OutputDoubleRegister(), kScratchDoubleReg);
|
||||
__ Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
|
||||
__ Psrlq(kScratchDoubleReg, 1);
|
||||
__ Andpd(i.OutputDoubleRegister(), kScratchDoubleReg);
|
||||
break;
|
||||
}
|
||||
case kSSEFloat64Neg: {
|
||||
// TODO(bmeurer): Use RIP relative 128-bit constants.
|
||||
__ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
|
||||
__ psllq(kScratchDoubleReg, 63);
|
||||
__ xorpd(i.OutputDoubleRegister(), kScratchDoubleReg);
|
||||
__ Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
|
||||
__ Psllq(kScratchDoubleReg, 63);
|
||||
__ Xorpd(i.OutputDoubleRegister(), kScratchDoubleReg);
|
||||
break;
|
||||
}
|
||||
case kSSEFloat64Sqrt:
|
||||
@ -2021,11 +2021,11 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
|
||||
case kX64Movss:
|
||||
EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
|
||||
if (instr->HasOutput()) {
|
||||
__ movss(i.OutputDoubleRegister(), i.MemoryOperand());
|
||||
__ Movss(i.OutputDoubleRegister(), i.MemoryOperand());
|
||||
} else {
|
||||
size_t index = 0;
|
||||
Operand operand = i.MemoryOperand(&index);
|
||||
__ movss(operand, i.InputDoubleRegister(index));
|
||||
__ Movss(operand, i.InputDoubleRegister(index));
|
||||
}
|
||||
break;
|
||||
case kX64Movsd: {
|
||||
@ -2054,11 +2054,11 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
|
||||
CpuFeatureScope sse_scope(tasm(), SSSE3);
|
||||
EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
|
||||
if (instr->HasOutput()) {
|
||||
__ movdqu(i.OutputSimd128Register(), i.MemoryOperand());
|
||||
__ Movdqu(i.OutputSimd128Register(), i.MemoryOperand());
|
||||
} else {
|
||||
size_t index = 0;
|
||||
Operand operand = i.MemoryOperand(&index);
|
||||
__ movdqu(operand, i.InputSimd128Register(index));
|
||||
__ Movdqu(operand, i.InputSimd128Register(index));
|
||||
}
|
||||
break;
|
||||
}
|
||||
@ -2080,7 +2080,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
|
||||
if (instr->InputAt(0)->IsRegister()) {
|
||||
__ Movd(i.OutputDoubleRegister(), i.InputRegister(0));
|
||||
} else {
|
||||
__ movss(i.OutputDoubleRegister(), i.InputOperand(0));
|
||||
__ Movss(i.OutputDoubleRegister(), i.InputOperand(0));
|
||||
}
|
||||
break;
|
||||
case kX64BitcastLD:
|
||||
@ -4076,7 +4076,7 @@ void CodeGenerator::AssembleConstructFrame() {
|
||||
int slot_idx = 0;
|
||||
for (int i = 0; i < XMMRegister::kNumRegisters; i++) {
|
||||
if (!((1 << i) & saves_fp)) continue;
|
||||
__ movdqu(Operand(rsp, kQuadWordSize * slot_idx),
|
||||
__ Movdqu(Operand(rsp, kQuadWordSize * slot_idx),
|
||||
XMMRegister::from_code(i));
|
||||
slot_idx++;
|
||||
}
|
||||
@ -4118,7 +4118,7 @@ void CodeGenerator::AssembleReturn(InstructionOperand* pop) {
|
||||
int slot_idx = 0;
|
||||
for (int i = 0; i < XMMRegister::kNumRegisters; i++) {
|
||||
if (!((1 << i) & saves_fp)) continue;
|
||||
__ movdqu(XMMRegister::from_code(i),
|
||||
__ Movdqu(XMMRegister::from_code(i),
|
||||
Operand(rsp, kQuadWordSize * slot_idx));
|
||||
slot_idx++;
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user