[wasm-simd] Add AVX for movlhps and some avx codegen

Bug: v8:9561
Change-Id: I18c832737cbea89e08af2ca166de7b01b7fe51b0
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/1986256
Reviewed-by: Deepti Gandluri <gdeepti@chromium.org>
Commit-Queue: Zhi An Ng <zhin@chromium.org>
Cr-Commit-Position: refs/heads/master@{#65674}
This commit is contained in:
Ng Zhi An 2020-01-08 11:45:32 -08:00 committed by Commit Bot
parent 6bef631d75
commit acc96e1f6a
6 changed files with 22 additions and 17 deletions

View File

@ -3919,14 +3919,6 @@ void Assembler::movups(Operand dst, XMMRegister src) {
emit_sse_operand(src, dst);
}
void Assembler::movlhps(XMMRegister dst, XMMRegister src) {
EnsureSpace ensure_space(this);
emit_optional_rex_32(dst, src);
emit(0x0F);
emit(0x16);
emit_sse_operand(dst, src);
}
void Assembler::sse_instr(XMMRegister dst, XMMRegister src, byte escape,
byte opcode) {
EnsureSpace ensure_space(this);

View File

@ -1133,7 +1133,9 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase {
void pshuflw(XMMRegister dst, XMMRegister src, uint8_t shuffle);
void pshuflw(XMMRegister dst, Operand src, uint8_t shuffle);
void movlhps(XMMRegister dst, XMMRegister src);
void movlhps(XMMRegister dst, XMMRegister src) {
sse_instr(dst, src, 0x0F, 0x16);
}
// AVX instruction
void vmovddup(XMMRegister dst, XMMRegister src);
@ -1228,6 +1230,9 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase {
SSE2_INSTRUCTION_LIST_SHIFT_IMM(AVX_SSE2_SHIFT_IMM)
#undef AVX_SSE2_SHIFT_IMM
void vmovlhps(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
vinstr(0x16, dst, src1, src2, kNone, k0F, kWIG);
}
void vcvtss2sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
vinstr(0x5a, dst, src1, src2, kF3, k0F, kWIG);
}

View File

@ -186,6 +186,7 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
AVX_OP(Por, por)
AVX_OP(Pxor, pxor)
AVX_OP(Psubd, psubd)
AVX_OP(Psubq, psubq)
AVX_OP(Pslld, pslld)
AVX_OP(Pavgb, pavgb)
AVX_OP(Pavgw, pavgw)
@ -215,6 +216,7 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
AVX_OP(Pshufd, pshufd)
AVX_OP(Cmpps, cmpps)
AVX_OP(Cmppd, cmppd)
AVX_OP(Movlhps, movlhps)
AVX_OP_SSE3(Movddup, movddup)
AVX_OP_SSSE3(Pshufb, pshufb)
AVX_OP_SSSE3(Psignd, psignd)

View File

@ -2274,7 +2274,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
case kX64F64x2ReplaceLane: {
CpuFeatureScope sse_scope(tasm(), SSE4_1);
if (instr->InputAt(2)->IsFPRegister()) {
__ movq(kScratchRegister, i.InputDoubleRegister(2));
__ Movq(kScratchRegister, i.InputDoubleRegister(2));
__ Pinsrq(i.OutputSimd128Register(), kScratchRegister, i.InputInt8(1));
} else {
__ Pinsrq(i.OutputSimd128Register(), i.InputOperand(2), i.InputInt8(1));
@ -2304,15 +2304,15 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
DCHECK_EQ(dst, i.InputSimd128Register(0));
// Extract high quardword.
__ pextrq(tmp, dst, 1);
__ Pextrq(tmp, dst, static_cast<int8_t>(1));
// We cannot convert directly into dst, as the next call to Cvtqui2sd will
// zero it out, so be careful to make sure dst is unique to tmp_xmm.
__ Cvtqui2sd(tmp_xmm, tmp);
// Extract low quadword and convert.
__ movq(tmp, dst);
__ Movq(tmp, dst);
__ Cvtqui2sd(dst, tmp);
// Move converted high quadword to top of dst.
__ movlhps(dst, tmp_xmm);
__ Movlhps(dst, tmp_xmm);
break;
}
case kX64F64x2ExtractLane: {
@ -2650,7 +2650,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ Pinsrq(i.OutputSimd128Register(), i.InputRegister(2),
i.InputInt8(1));
} else {
__ pinsrq(i.OutputSimd128Register(), i.InputOperand(2), i.InputInt8(1));
__ Pinsrq(i.OutputSimd128Register(), i.InputOperand(2), i.InputInt8(1));
}
break;
}
@ -2658,11 +2658,11 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
XMMRegister dst = i.OutputSimd128Register();
XMMRegister src = i.InputSimd128Register(0);
if (dst == src) {
__ movapd(kScratchDoubleReg, src);
__ Movapd(kScratchDoubleReg, src);
src = kScratchDoubleReg;
}
__ pxor(dst, dst);
__ psubq(dst, src);
__ Pxor(dst, dst);
__ Psubq(dst, src);
break;
}
case kX64I64x2Shl: {

View File

@ -1297,6 +1297,11 @@ int DisassemblerX64::AVXInstruction(byte* data) {
current += PrintRightXMMOperand(current);
AppendToBuffer(",%s", NameOfXMMRegister(regop));
break;
case 0x16:
AppendToBuffer("vmovlhps %s,%s,", NameOfXMMRegister(regop),
NameOfXMMRegister(vvvv));
current += PrintRightXMMOperand(current);
break;
case 0x28:
AppendToBuffer("vmovaps %s,", NameOfXMMRegister(regop));
current += PrintRightXMMOperand(current);

View File

@ -663,6 +663,7 @@ TEST(DisasmX64) {
__ vmovups(xmm5, xmm1);
__ vmovups(xmm5, Operand(rdx, 4));
__ vmovups(Operand(rdx, 4), xmm5);
__ vmovlhps(xmm1, xmm3, xmm5);
__ vandps(xmm0, xmm9, xmm2);
__ vandps(xmm9, xmm1, Operand(rbx, rcx, times_4, 10000));