[wasm-simd] Add AVX for movlhps and some avx codegen
Bug: v8:9561 Change-Id: I18c832737cbea89e08af2ca166de7b01b7fe51b0 Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/1986256 Reviewed-by: Deepti Gandluri <gdeepti@chromium.org> Commit-Queue: Zhi An Ng <zhin@chromium.org> Cr-Commit-Position: refs/heads/master@{#65674}
This commit is contained in:
parent
6bef631d75
commit
acc96e1f6a
@ -3919,14 +3919,6 @@ void Assembler::movups(Operand dst, XMMRegister src) {
|
|||||||
emit_sse_operand(src, dst);
|
emit_sse_operand(src, dst);
|
||||||
}
|
}
|
||||||
|
|
||||||
void Assembler::movlhps(XMMRegister dst, XMMRegister src) {
|
|
||||||
EnsureSpace ensure_space(this);
|
|
||||||
emit_optional_rex_32(dst, src);
|
|
||||||
emit(0x0F);
|
|
||||||
emit(0x16);
|
|
||||||
emit_sse_operand(dst, src);
|
|
||||||
}
|
|
||||||
|
|
||||||
void Assembler::sse_instr(XMMRegister dst, XMMRegister src, byte escape,
|
void Assembler::sse_instr(XMMRegister dst, XMMRegister src, byte escape,
|
||||||
byte opcode) {
|
byte opcode) {
|
||||||
EnsureSpace ensure_space(this);
|
EnsureSpace ensure_space(this);
|
||||||
|
@ -1133,7 +1133,9 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase {
|
|||||||
void pshuflw(XMMRegister dst, XMMRegister src, uint8_t shuffle);
|
void pshuflw(XMMRegister dst, XMMRegister src, uint8_t shuffle);
|
||||||
void pshuflw(XMMRegister dst, Operand src, uint8_t shuffle);
|
void pshuflw(XMMRegister dst, Operand src, uint8_t shuffle);
|
||||||
|
|
||||||
void movlhps(XMMRegister dst, XMMRegister src);
|
void movlhps(XMMRegister dst, XMMRegister src) {
|
||||||
|
sse_instr(dst, src, 0x0F, 0x16);
|
||||||
|
}
|
||||||
|
|
||||||
// AVX instruction
|
// AVX instruction
|
||||||
void vmovddup(XMMRegister dst, XMMRegister src);
|
void vmovddup(XMMRegister dst, XMMRegister src);
|
||||||
@ -1228,6 +1230,9 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase {
|
|||||||
SSE2_INSTRUCTION_LIST_SHIFT_IMM(AVX_SSE2_SHIFT_IMM)
|
SSE2_INSTRUCTION_LIST_SHIFT_IMM(AVX_SSE2_SHIFT_IMM)
|
||||||
#undef AVX_SSE2_SHIFT_IMM
|
#undef AVX_SSE2_SHIFT_IMM
|
||||||
|
|
||||||
|
void vmovlhps(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
|
||||||
|
vinstr(0x16, dst, src1, src2, kNone, k0F, kWIG);
|
||||||
|
}
|
||||||
void vcvtss2sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
|
void vcvtss2sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
|
||||||
vinstr(0x5a, dst, src1, src2, kF3, k0F, kWIG);
|
vinstr(0x5a, dst, src1, src2, kF3, k0F, kWIG);
|
||||||
}
|
}
|
||||||
|
@ -186,6 +186,7 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
|
|||||||
AVX_OP(Por, por)
|
AVX_OP(Por, por)
|
||||||
AVX_OP(Pxor, pxor)
|
AVX_OP(Pxor, pxor)
|
||||||
AVX_OP(Psubd, psubd)
|
AVX_OP(Psubd, psubd)
|
||||||
|
AVX_OP(Psubq, psubq)
|
||||||
AVX_OP(Pslld, pslld)
|
AVX_OP(Pslld, pslld)
|
||||||
AVX_OP(Pavgb, pavgb)
|
AVX_OP(Pavgb, pavgb)
|
||||||
AVX_OP(Pavgw, pavgw)
|
AVX_OP(Pavgw, pavgw)
|
||||||
@ -215,6 +216,7 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
|
|||||||
AVX_OP(Pshufd, pshufd)
|
AVX_OP(Pshufd, pshufd)
|
||||||
AVX_OP(Cmpps, cmpps)
|
AVX_OP(Cmpps, cmpps)
|
||||||
AVX_OP(Cmppd, cmppd)
|
AVX_OP(Cmppd, cmppd)
|
||||||
|
AVX_OP(Movlhps, movlhps)
|
||||||
AVX_OP_SSE3(Movddup, movddup)
|
AVX_OP_SSE3(Movddup, movddup)
|
||||||
AVX_OP_SSSE3(Pshufb, pshufb)
|
AVX_OP_SSSE3(Pshufb, pshufb)
|
||||||
AVX_OP_SSSE3(Psignd, psignd)
|
AVX_OP_SSSE3(Psignd, psignd)
|
||||||
|
@ -2274,7 +2274,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
|
|||||||
case kX64F64x2ReplaceLane: {
|
case kX64F64x2ReplaceLane: {
|
||||||
CpuFeatureScope sse_scope(tasm(), SSE4_1);
|
CpuFeatureScope sse_scope(tasm(), SSE4_1);
|
||||||
if (instr->InputAt(2)->IsFPRegister()) {
|
if (instr->InputAt(2)->IsFPRegister()) {
|
||||||
__ movq(kScratchRegister, i.InputDoubleRegister(2));
|
__ Movq(kScratchRegister, i.InputDoubleRegister(2));
|
||||||
__ Pinsrq(i.OutputSimd128Register(), kScratchRegister, i.InputInt8(1));
|
__ Pinsrq(i.OutputSimd128Register(), kScratchRegister, i.InputInt8(1));
|
||||||
} else {
|
} else {
|
||||||
__ Pinsrq(i.OutputSimd128Register(), i.InputOperand(2), i.InputInt8(1));
|
__ Pinsrq(i.OutputSimd128Register(), i.InputOperand(2), i.InputInt8(1));
|
||||||
@ -2304,15 +2304,15 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
|
|||||||
DCHECK_EQ(dst, i.InputSimd128Register(0));
|
DCHECK_EQ(dst, i.InputSimd128Register(0));
|
||||||
|
|
||||||
// Extract high quardword.
|
// Extract high quardword.
|
||||||
__ pextrq(tmp, dst, 1);
|
__ Pextrq(tmp, dst, static_cast<int8_t>(1));
|
||||||
// We cannot convert directly into dst, as the next call to Cvtqui2sd will
|
// We cannot convert directly into dst, as the next call to Cvtqui2sd will
|
||||||
// zero it out, so be careful to make sure dst is unique to tmp_xmm.
|
// zero it out, so be careful to make sure dst is unique to tmp_xmm.
|
||||||
__ Cvtqui2sd(tmp_xmm, tmp);
|
__ Cvtqui2sd(tmp_xmm, tmp);
|
||||||
// Extract low quadword and convert.
|
// Extract low quadword and convert.
|
||||||
__ movq(tmp, dst);
|
__ Movq(tmp, dst);
|
||||||
__ Cvtqui2sd(dst, tmp);
|
__ Cvtqui2sd(dst, tmp);
|
||||||
// Move converted high quadword to top of dst.
|
// Move converted high quadword to top of dst.
|
||||||
__ movlhps(dst, tmp_xmm);
|
__ Movlhps(dst, tmp_xmm);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case kX64F64x2ExtractLane: {
|
case kX64F64x2ExtractLane: {
|
||||||
@ -2650,7 +2650,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
|
|||||||
__ Pinsrq(i.OutputSimd128Register(), i.InputRegister(2),
|
__ Pinsrq(i.OutputSimd128Register(), i.InputRegister(2),
|
||||||
i.InputInt8(1));
|
i.InputInt8(1));
|
||||||
} else {
|
} else {
|
||||||
__ pinsrq(i.OutputSimd128Register(), i.InputOperand(2), i.InputInt8(1));
|
__ Pinsrq(i.OutputSimd128Register(), i.InputOperand(2), i.InputInt8(1));
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@ -2658,11 +2658,11 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
|
|||||||
XMMRegister dst = i.OutputSimd128Register();
|
XMMRegister dst = i.OutputSimd128Register();
|
||||||
XMMRegister src = i.InputSimd128Register(0);
|
XMMRegister src = i.InputSimd128Register(0);
|
||||||
if (dst == src) {
|
if (dst == src) {
|
||||||
__ movapd(kScratchDoubleReg, src);
|
__ Movapd(kScratchDoubleReg, src);
|
||||||
src = kScratchDoubleReg;
|
src = kScratchDoubleReg;
|
||||||
}
|
}
|
||||||
__ pxor(dst, dst);
|
__ Pxor(dst, dst);
|
||||||
__ psubq(dst, src);
|
__ Psubq(dst, src);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case kX64I64x2Shl: {
|
case kX64I64x2Shl: {
|
||||||
|
@ -1297,6 +1297,11 @@ int DisassemblerX64::AVXInstruction(byte* data) {
|
|||||||
current += PrintRightXMMOperand(current);
|
current += PrintRightXMMOperand(current);
|
||||||
AppendToBuffer(",%s", NameOfXMMRegister(regop));
|
AppendToBuffer(",%s", NameOfXMMRegister(regop));
|
||||||
break;
|
break;
|
||||||
|
case 0x16:
|
||||||
|
AppendToBuffer("vmovlhps %s,%s,", NameOfXMMRegister(regop),
|
||||||
|
NameOfXMMRegister(vvvv));
|
||||||
|
current += PrintRightXMMOperand(current);
|
||||||
|
break;
|
||||||
case 0x28:
|
case 0x28:
|
||||||
AppendToBuffer("vmovaps %s,", NameOfXMMRegister(regop));
|
AppendToBuffer("vmovaps %s,", NameOfXMMRegister(regop));
|
||||||
current += PrintRightXMMOperand(current);
|
current += PrintRightXMMOperand(current);
|
||||||
|
@ -663,6 +663,7 @@ TEST(DisasmX64) {
|
|||||||
__ vmovups(xmm5, xmm1);
|
__ vmovups(xmm5, xmm1);
|
||||||
__ vmovups(xmm5, Operand(rdx, 4));
|
__ vmovups(xmm5, Operand(rdx, 4));
|
||||||
__ vmovups(Operand(rdx, 4), xmm5);
|
__ vmovups(Operand(rdx, 4), xmm5);
|
||||||
|
__ vmovlhps(xmm1, xmm3, xmm5);
|
||||||
|
|
||||||
__ vandps(xmm0, xmm9, xmm2);
|
__ vandps(xmm0, xmm9, xmm2);
|
||||||
__ vandps(xmm9, xmm1, Operand(rbx, rcx, times_4, 10000));
|
__ vandps(xmm9, xmm1, Operand(rbx, rcx, times_4, 10000));
|
||||||
|
Loading…
Reference in New Issue
Block a user