[wasm-simd] AVX codegen for load splat
Bug: v8:9886 Change-Id: I321e93d02971c6ba568d9d7c52d464ffc2754665 Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/1929837 Commit-Queue: Zhi An Ng <zhin@chromium.org> Reviewed-by: Bill Budge <bbudge@chromium.org> Cr-Commit-Position: refs/heads/master@{#65277}
This commit is contained in:
parent
2fb290d79a
commit
83fc8559fa
@ -3517,6 +3517,23 @@ void Assembler::movmskps(Register dst, XMMRegister src) {
|
||||
}
|
||||
|
||||
// AVX instructions
|
||||
|
||||
void Assembler::vmovddup(XMMRegister dst, Operand src) {
|
||||
DCHECK(IsEnabled(AVX));
|
||||
EnsureSpace ensure_space(this);
|
||||
emit_vex_prefix(dst, xmm0, src, kL128, kF2, k0F, kWIG);
|
||||
emit(0x12);
|
||||
emit_sse_operand(dst, src);
|
||||
}
|
||||
|
||||
void Assembler::vbroadcastss(XMMRegister dst, Operand src) {
|
||||
DCHECK(IsEnabled(AVX));
|
||||
EnsureSpace ensure_space(this);
|
||||
emit_vex_prefix(dst, xmm0, src, kL128, k66, k0F38, kW0);
|
||||
emit(0x18);
|
||||
emit_sse_operand(dst, src);
|
||||
}
|
||||
|
||||
void Assembler::vfmasd(byte op, XMMRegister dst, XMMRegister src1,
|
||||
XMMRegister src2) {
|
||||
DCHECK(IsEnabled(FMA3));
|
||||
|
@ -1116,6 +1116,8 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase {
|
||||
void movlhps(XMMRegister dst, XMMRegister src);
|
||||
|
||||
// AVX instruction
|
||||
void vmovddup(XMMRegister dst, Operand src);
|
||||
void vbroadcastss(XMMRegister dst, Operand src);
|
||||
void vfmadd132sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
|
||||
vfmasd(0x99, dst, src1, src2);
|
||||
}
|
||||
@ -1628,6 +1630,14 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase {
|
||||
vinstr(0x70, dst, xmm0, src, k66, k0F, kWIG);
|
||||
emit(imm8);
|
||||
}
|
||||
void vpshuflw(XMMRegister dst, XMMRegister src, uint8_t imm8) {
|
||||
vinstr(0x70, dst, xmm0, src, kF2, k0F, kWIG);
|
||||
emit(imm8);
|
||||
}
|
||||
void vpshuflw(XMMRegister dst, Operand src, uint8_t imm8) {
|
||||
vinstr(0x70, dst, xmm0, src, kF2, k0F, kWIG);
|
||||
emit(imm8);
|
||||
}
|
||||
|
||||
void vps(byte op, XMMRegister dst, XMMRegister src1, XMMRegister src2);
|
||||
void vps(byte op, XMMRegister dst, XMMRegister src1, Operand src2);
|
||||
|
@ -116,6 +116,13 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
|
||||
.template emit<&Assembler::v##name, &Assembler::name>(dst, args...); \
|
||||
}
|
||||
|
||||
#define AVX_OP_SSE3(macro_name, name) \
|
||||
template <typename Dst, typename... Args> \
|
||||
void macro_name(Dst dst, Args... args) { \
|
||||
AvxHelper<Dst, Args...>{this, base::Optional<CpuFeature>(SSE3)} \
|
||||
.template emit<&Assembler::v##name, &Assembler::name>(dst, args...); \
|
||||
}
|
||||
|
||||
#define AVX_OP_SSSE3(macro_name, name) \
|
||||
template <typename Dst, typename... Args> \
|
||||
void macro_name(Dst dst, Args... args) { \
|
||||
@ -195,6 +202,9 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
|
||||
AVX_OP(Subps, subps)
|
||||
AVX_OP(Mulps, mulps)
|
||||
AVX_OP(Divps, divps)
|
||||
AVX_OP(Pshuflw, pshuflw)
|
||||
AVX_OP(Punpcklqdq, punpcklqdq)
|
||||
AVX_OP_SSE3(Movddup, movddup)
|
||||
AVX_OP_SSSE3(Pshufb, pshufb)
|
||||
AVX_OP_SSSE3(Psignd, psignd)
|
||||
AVX_OP_SSE4_1(Pmulld, pmulld)
|
||||
|
@ -3660,31 +3660,34 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
|
||||
}
|
||||
case kX64S8x16LoadSplat: {
|
||||
EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
|
||||
__ pinsrb(i.OutputSimd128Register(), i.MemoryOperand(), 0);
|
||||
__ pxor(kScratchDoubleReg, kScratchDoubleReg);
|
||||
__ pshufb(i.OutputSimd128Register(), kScratchDoubleReg);
|
||||
__ Pinsrb(i.OutputSimd128Register(), i.MemoryOperand(), 0);
|
||||
__ Pxor(kScratchDoubleReg, kScratchDoubleReg);
|
||||
__ Pshufb(i.OutputSimd128Register(), kScratchDoubleReg);
|
||||
break;
|
||||
}
|
||||
case kX64S16x8LoadSplat: {
|
||||
EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
|
||||
__ pinsrw(i.OutputSimd128Register(), i.MemoryOperand(), 0);
|
||||
__ pshuflw(i.OutputSimd128Register(), i.OutputSimd128Register(), 0);
|
||||
__ punpcklqdq(i.OutputSimd128Register(), i.OutputSimd128Register());
|
||||
__ Pinsrw(i.OutputSimd128Register(), i.MemoryOperand(), 0);
|
||||
__ Pshuflw(i.OutputSimd128Register(), i.OutputSimd128Register(),
|
||||
static_cast<uint8_t>(0));
|
||||
__ Punpcklqdq(i.OutputSimd128Register(), i.OutputSimd128Register());
|
||||
break;
|
||||
}
|
||||
case kX64S32x4LoadSplat: {
|
||||
EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
|
||||
// TODO(v8:9886): AVX codegen
|
||||
__ movss(i.OutputSimd128Register(), i.MemoryOperand());
|
||||
__ shufps(i.OutputSimd128Register(), i.OutputSimd128Register(),
|
||||
static_cast<byte>(0));
|
||||
if (CpuFeatures::IsSupported(AVX)) {
|
||||
CpuFeatureScope avx_scope(tasm(), AVX);
|
||||
__ vbroadcastss(i.OutputSimd128Register(), i.MemoryOperand());
|
||||
} else {
|
||||
__ Movss(i.OutputSimd128Register(), i.MemoryOperand());
|
||||
__ Shufps(i.OutputSimd128Register(), i.OutputSimd128Register(),
|
||||
static_cast<byte>(0));
|
||||
}
|
||||
break;
|
||||
}
|
||||
case kX64S64x2LoadSplat: {
|
||||
EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
|
||||
// TODO(v8:9886): AVX codegen
|
||||
__ movsd(i.OutputSimd128Register(), i.MemoryOperand());
|
||||
__ punpcklqdq(i.OutputSimd128Register(), i.OutputSimd128Register());
|
||||
__ Movddup(i.OutputSimd128Register(), i.MemoryOperand());
|
||||
break;
|
||||
}
|
||||
case kX64I16x8Load8x8S: {
|
||||
|
@ -819,6 +819,10 @@ int DisassemblerX64::AVXInstruction(byte* data) {
|
||||
int mod, regop, rm, vvvv = vex_vreg();
|
||||
get_modrm(*current, &mod, ®op, &rm);
|
||||
switch (opcode) {
|
||||
case 0x18:
|
||||
AppendToBuffer("vbroadcastss %s,", NameOfXMMRegister(regop));
|
||||
current += PrintRightXMMOperand(current);
|
||||
break;
|
||||
case 0x99:
|
||||
AppendToBuffer("vfmadd132s%c %s,%s,", float_size_code(),
|
||||
NameOfXMMRegister(regop), NameOfXMMRegister(vvvv));
|
||||
@ -1067,6 +1071,10 @@ int DisassemblerX64::AVXInstruction(byte* data) {
|
||||
}
|
||||
AppendToBuffer(",%s", NameOfXMMRegister(regop));
|
||||
break;
|
||||
case 0x12:
|
||||
AppendToBuffer("vmovddup %s,", NameOfXMMRegister(regop));
|
||||
current += PrintRightXMMOperand(current);
|
||||
break;
|
||||
case 0x2A:
|
||||
AppendToBuffer("%s %s,%s,", vex_w() ? "vcvtqsi2sd" : "vcvtlsi2sd",
|
||||
NameOfXMMRegister(regop), NameOfXMMRegister(vvvv));
|
||||
@ -1126,6 +1134,11 @@ int DisassemblerX64::AVXInstruction(byte* data) {
|
||||
AppendToBuffer("vlddqu %s,", NameOfXMMRegister(regop));
|
||||
current += PrintRightXMMOperand(current);
|
||||
break;
|
||||
case 0x70:
|
||||
AppendToBuffer("vpshuflw %s,", NameOfXMMRegister(regop));
|
||||
current += PrintRightXMMOperand(current);
|
||||
AppendToBuffer(",0x%x", *current++);
|
||||
break;
|
||||
case 0x7C:
|
||||
AppendToBuffer("vhaddps %s,%s,", NameOfXMMRegister(regop),
|
||||
NameOfXMMRegister(vvvv));
|
||||
|
@ -763,7 +763,12 @@ TEST(DisasmX64) {
|
||||
__ vpinsrd(xmm1, xmm2, rax, 2);
|
||||
__ vpinsrd(xmm1, xmm2, Operand(rbx, rcx, times_4, 10000), 2);
|
||||
__ vpshufd(xmm1, xmm2, 85);
|
||||
__ vpshuflw(xmm1, xmm2, 85);
|
||||
__ vpshuflw(xmm1, Operand(rbx, rcx, times_4, 10000), 85);
|
||||
__ vshufps(xmm3, xmm2, xmm3, 3);
|
||||
|
||||
__ vmovddup(xmm1, Operand(rbx, rcx, times_4, 10000));
|
||||
__ vbroadcastss(xmm1, Operand(rbx, rcx, times_4, 10000));
|
||||
}
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user