[wasm-simd] Add AVX codegen for some instructions
Bug: v8:9561 Change-Id: Ic17e0861cee5b1613370f6d2a1801a0e1744a3f5 Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/1868823 Reviewed-by: Deepti Gandluri <gdeepti@chromium.org> Commit-Queue: Zhi An Ng <zhin@chromium.org> Cr-Commit-Position: refs/heads/master@{#64390}
This commit is contained in:
parent
afbbfcbe1c
commit
a972a8df85
@ -4254,6 +4254,16 @@ void Assembler::vps(byte op, XMMRegister dst, XMMRegister src1, Operand src2) {
|
||||
emit_sse_operand(dst, src2);
|
||||
}
|
||||
|
||||
void Assembler::vps(byte op, XMMRegister dst, XMMRegister src1,
|
||||
XMMRegister src2, byte imm8) {
|
||||
DCHECK(IsEnabled(AVX));
|
||||
EnsureSpace ensure_space(this);
|
||||
emit_vex_prefix(dst, src1, src2, kL128, kNone, k0F, kWIG);
|
||||
emit(op);
|
||||
emit_sse_operand(dst, src2);
|
||||
emit(imm8);
|
||||
}
|
||||
|
||||
void Assembler::vpd(byte op, XMMRegister dst, XMMRegister src1,
|
||||
XMMRegister src2) {
|
||||
DCHECK(IsEnabled(AVX));
|
||||
|
@ -1504,6 +1504,10 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase {
|
||||
void vss(byte op, XMMRegister dst, XMMRegister src1, XMMRegister src2);
|
||||
void vss(byte op, XMMRegister dst, XMMRegister src1, Operand src2);
|
||||
|
||||
void vshufps(XMMRegister dst, XMMRegister src1, XMMRegister src2, byte imm8) {
|
||||
vps(0xC6, dst, src1, src2, imm8);
|
||||
}
|
||||
|
||||
void vmovaps(XMMRegister dst, XMMRegister src) { vps(0x28, dst, xmm0, src); }
|
||||
void vmovups(XMMRegister dst, XMMRegister src) { vps(0x10, dst, xmm0, src); }
|
||||
void vmovups(XMMRegister dst, Operand src) { vps(0x10, dst, xmm0, src); }
|
||||
@ -1647,6 +1651,8 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase {
|
||||
|
||||
void vps(byte op, XMMRegister dst, XMMRegister src1, XMMRegister src2);
|
||||
void vps(byte op, XMMRegister dst, XMMRegister src1, Operand src2);
|
||||
void vps(byte op, XMMRegister dst, XMMRegister src1, XMMRegister src2,
|
||||
byte imm8);
|
||||
void vpd(byte op, XMMRegister dst, XMMRegister src1, XMMRegister src2);
|
||||
void vpd(byte op, XMMRegister dst, XMMRegister src1, Operand src2);
|
||||
|
||||
|
@ -152,6 +152,7 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
|
||||
AVX_OP(Roundsd, roundsd)
|
||||
AVX_OP(Sqrtss, sqrtss)
|
||||
AVX_OP(Sqrtsd, sqrtsd)
|
||||
AVX_OP(Sqrtps, sqrtps)
|
||||
AVX_OP(Sqrtpd, sqrtpd)
|
||||
AVX_OP(Ucomiss, ucomiss)
|
||||
AVX_OP(Ucomisd, ucomisd)
|
||||
@ -172,6 +173,11 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
|
||||
AVX_OP(Pmaxsd, pmaxsd)
|
||||
AVX_OP(Pmaxud, pmaxud)
|
||||
AVX_OP(Pcmpgtd, pcmpgtd)
|
||||
AVX_OP(Addpd, addpd)
|
||||
AVX_OP(Subpd, subpd)
|
||||
AVX_OP(Mulpd, mulpd)
|
||||
AVX_OP(Divpd, divpd)
|
||||
AVX_OP(Shufps, shufps)
|
||||
|
||||
#undef AVX_OP
|
||||
|
||||
|
@ -2331,19 +2331,19 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
|
||||
break;
|
||||
}
|
||||
case kX64F64x2Add: {
|
||||
ASSEMBLE_SSE_BINOP(addpd);
|
||||
ASSEMBLE_SSE_BINOP(Addpd);
|
||||
break;
|
||||
}
|
||||
case kX64F64x2Sub: {
|
||||
ASSEMBLE_SSE_BINOP(subpd);
|
||||
ASSEMBLE_SSE_BINOP(Subpd);
|
||||
break;
|
||||
}
|
||||
case kX64F64x2Mul: {
|
||||
ASSEMBLE_SSE_BINOP(mulpd);
|
||||
ASSEMBLE_SSE_BINOP(Mulpd);
|
||||
break;
|
||||
}
|
||||
case kX64F64x2Div: {
|
||||
ASSEMBLE_SSE_BINOP(divpd);
|
||||
ASSEMBLE_SSE_BINOP(Divpd);
|
||||
break;
|
||||
}
|
||||
case kX64F64x2Min: {
|
||||
@ -2435,11 +2435,11 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
|
||||
case kX64F32x4Splat: {
|
||||
XMMRegister dst = i.OutputSimd128Register();
|
||||
if (instr->InputAt(0)->IsFPRegister()) {
|
||||
__ movss(dst, i.InputDoubleRegister(0));
|
||||
__ Movss(dst, i.InputDoubleRegister(0));
|
||||
} else {
|
||||
__ movss(dst, i.InputOperand(0));
|
||||
__ Movss(dst, i.InputOperand(0));
|
||||
}
|
||||
__ shufps(dst, dst, 0x0);
|
||||
__ Shufps(dst, dst, static_cast<byte>(0x0));
|
||||
break;
|
||||
}
|
||||
case kX64F32x4ExtractLane: {
|
||||
@ -2484,13 +2484,13 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
|
||||
XMMRegister dst = i.OutputSimd128Register();
|
||||
XMMRegister src = i.InputSimd128Register(0);
|
||||
if (dst == src) {
|
||||
__ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
|
||||
__ psrld(kScratchDoubleReg, 1);
|
||||
__ andps(i.OutputSimd128Register(), kScratchDoubleReg);
|
||||
__ Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
|
||||
__ Psrld(kScratchDoubleReg, static_cast<byte>(1));
|
||||
__ Andps(i.OutputSimd128Register(), kScratchDoubleReg);
|
||||
} else {
|
||||
__ pcmpeqd(dst, dst);
|
||||
__ psrld(dst, 1);
|
||||
__ andps(dst, i.InputSimd128Register(0));
|
||||
__ Pcmpeqd(dst, dst);
|
||||
__ Psrld(dst, static_cast<byte>(1));
|
||||
__ Andps(dst, i.InputSimd128Register(0));
|
||||
}
|
||||
break;
|
||||
}
|
||||
@ -2498,13 +2498,13 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
|
||||
XMMRegister dst = i.OutputSimd128Register();
|
||||
XMMRegister src = i.InputSimd128Register(0);
|
||||
if (dst == src) {
|
||||
__ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
|
||||
__ pslld(kScratchDoubleReg, 31);
|
||||
__ xorps(i.OutputSimd128Register(), kScratchDoubleReg);
|
||||
__ Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
|
||||
__ Pslld(kScratchDoubleReg, static_cast<byte>(31));
|
||||
__ Xorps(i.OutputSimd128Register(), kScratchDoubleReg);
|
||||
} else {
|
||||
__ pcmpeqd(dst, dst);
|
||||
__ pslld(dst, 31);
|
||||
__ xorps(dst, i.InputSimd128Register(0));
|
||||
__ Pcmpeqd(dst, dst);
|
||||
__ Pslld(dst, static_cast<byte>(31));
|
||||
__ Xorps(dst, i.InputSimd128Register(0));
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user