[wasm-simd][x64] Add AVX codegen
For a bunch of s8x16, s16x2 and s32x4 shuffle ops (generated by s8x16shuffle). Bug: v8:9561 Change-Id: I0e5cd8a90edba8bc15918c0ca1dc830475db2769 Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2110952 Reviewed-by: Deepti Gandluri <gdeepti@chromium.org> Commit-Queue: Zhi An Ng <zhin@chromium.org> Cr-Commit-Position: refs/heads/master@{#66865}
This commit is contained in:
parent
8e8d61b38b
commit
2f83184db3
@ -1523,6 +1523,10 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase {
|
|||||||
vinstr(0x70, dst, xmm0, src, k66, k0F, kWIG);
|
vinstr(0x70, dst, xmm0, src, k66, k0F, kWIG);
|
||||||
emit(imm8);
|
emit(imm8);
|
||||||
}
|
}
|
||||||
|
void vpshufd(XMMRegister dst, Operand src, uint8_t imm8) {
|
||||||
|
vinstr(0x70, dst, xmm0, src, k66, k0F, kWIG);
|
||||||
|
emit(imm8);
|
||||||
|
}
|
||||||
void vpshuflw(XMMRegister dst, XMMRegister src, uint8_t imm8) {
|
void vpshuflw(XMMRegister dst, XMMRegister src, uint8_t imm8) {
|
||||||
vinstr(0x70, dst, xmm0, src, kF2, k0F, kWIG);
|
vinstr(0x70, dst, xmm0, src, kF2, k0F, kWIG);
|
||||||
emit(imm8);
|
emit(imm8);
|
||||||
@ -1531,6 +1535,14 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase {
|
|||||||
vinstr(0x70, dst, xmm0, src, kF2, k0F, kWIG);
|
vinstr(0x70, dst, xmm0, src, kF2, k0F, kWIG);
|
||||||
emit(imm8);
|
emit(imm8);
|
||||||
}
|
}
|
||||||
|
void vpshufhw(XMMRegister dst, XMMRegister src, uint8_t imm8) {
|
||||||
|
vinstr(0x70, dst, xmm0, src, kF3, k0F, kWIG);
|
||||||
|
emit(imm8);
|
||||||
|
}
|
||||||
|
void vpshufhw(XMMRegister dst, Operand src, uint8_t imm8) {
|
||||||
|
vinstr(0x70, dst, xmm0, src, kF2, k0F, kWIG);
|
||||||
|
emit(imm8);
|
||||||
|
}
|
||||||
|
|
||||||
void vpblendw(XMMRegister dst, XMMRegister src1, XMMRegister src2,
|
void vpblendw(XMMRegister dst, XMMRegister src1, XMMRegister src2,
|
||||||
uint8_t mask) {
|
uint8_t mask) {
|
||||||
|
@ -216,10 +216,18 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
|
|||||||
AVX_OP(Mulps, mulps)
|
AVX_OP(Mulps, mulps)
|
||||||
AVX_OP(Divps, divps)
|
AVX_OP(Divps, divps)
|
||||||
AVX_OP(Pshuflw, pshuflw)
|
AVX_OP(Pshuflw, pshuflw)
|
||||||
|
AVX_OP(Pshufhw, pshufhw)
|
||||||
AVX_OP(Packsswb, packsswb)
|
AVX_OP(Packsswb, packsswb)
|
||||||
AVX_OP(Packuswb, packuswb)
|
AVX_OP(Packuswb, packuswb)
|
||||||
AVX_OP(Packssdw, packssdw)
|
AVX_OP(Packssdw, packssdw)
|
||||||
|
AVX_OP(Punpcklbw, punpcklbw)
|
||||||
|
AVX_OP(Punpcklwd, punpcklwd)
|
||||||
|
AVX_OP(Punpckldq, punpckldq)
|
||||||
|
AVX_OP(Punpckhbw, punpckhbw)
|
||||||
|
AVX_OP(Punpckhwd, punpckhwd)
|
||||||
|
AVX_OP(Punpckhdq, punpckhdq)
|
||||||
AVX_OP(Punpcklqdq, punpcklqdq)
|
AVX_OP(Punpcklqdq, punpcklqdq)
|
||||||
|
AVX_OP(Punpckhqdq, punpckhqdq)
|
||||||
AVX_OP(Pshufd, pshufd)
|
AVX_OP(Pshufd, pshufd)
|
||||||
AVX_OP(Cmpps, cmpps)
|
AVX_OP(Cmpps, cmpps)
|
||||||
AVX_OP(Cmppd, cmppd)
|
AVX_OP(Cmppd, cmppd)
|
||||||
|
@ -62,6 +62,10 @@ class InstructionOperandConverter {
|
|||||||
return static_cast<int8_t>(InputInt32(index));
|
return static_cast<int8_t>(InputInt32(index));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
uint8_t InputUint8(size_t index) {
|
||||||
|
return bit_cast<uint8_t>(InputInt8(index));
|
||||||
|
}
|
||||||
|
|
||||||
int16_t InputInt16(size_t index) {
|
int16_t InputInt16(size_t index) {
|
||||||
return static_cast<int16_t>(InputInt32(index));
|
return static_cast<int16_t>(InputInt32(index));
|
||||||
}
|
}
|
||||||
|
@ -580,9 +580,8 @@ void EmitWordLoadPoisoningIfNeeded(CodeGenerator* codegen,
|
|||||||
ASSEMBLE_SIMD_INSTR(opcode, dst, input_index); \
|
ASSEMBLE_SIMD_INSTR(opcode, dst, input_index); \
|
||||||
} while (false)
|
} while (false)
|
||||||
|
|
||||||
#define ASSEMBLE_SIMD_IMM_SHUFFLE(opcode, SSELevel, imm) \
|
#define ASSEMBLE_SIMD_IMM_SHUFFLE(opcode, imm) \
|
||||||
do { \
|
do { \
|
||||||
CpuFeatureScope sse_scope(tasm(), SSELevel); \
|
|
||||||
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); \
|
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); \
|
||||||
__ opcode(i.OutputSimd128Register(), i.InputSimd128Register(1), imm); \
|
__ opcode(i.OutputSimd128Register(), i.InputSimd128Register(1), imm); \
|
||||||
} while (false)
|
} while (false)
|
||||||
@ -3747,129 +3746,126 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
|
|||||||
}
|
}
|
||||||
case kX64S32x4Swizzle: {
|
case kX64S32x4Swizzle: {
|
||||||
DCHECK_EQ(2, instr->InputCount());
|
DCHECK_EQ(2, instr->InputCount());
|
||||||
ASSEMBLE_SIMD_IMM_INSTR(pshufd, i.OutputSimd128Register(), 0,
|
ASSEMBLE_SIMD_IMM_INSTR(Pshufd, i.OutputSimd128Register(), 0,
|
||||||
i.InputInt8(1));
|
i.InputUint8(1));
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case kX64S32x4Shuffle: {
|
case kX64S32x4Shuffle: {
|
||||||
CpuFeatureScope sse_scope(tasm(), SSE4_1);
|
|
||||||
DCHECK_EQ(4, instr->InputCount()); // Swizzles should be handled above.
|
DCHECK_EQ(4, instr->InputCount()); // Swizzles should be handled above.
|
||||||
int8_t shuffle = i.InputInt8(2);
|
uint8_t shuffle = i.InputUint8(2);
|
||||||
DCHECK_NE(0xe4, shuffle); // A simple blend should be handled below.
|
DCHECK_NE(0xe4, shuffle); // A simple blend should be handled below.
|
||||||
ASSEMBLE_SIMD_IMM_INSTR(pshufd, kScratchDoubleReg, 1, shuffle);
|
ASSEMBLE_SIMD_IMM_INSTR(Pshufd, kScratchDoubleReg, 1, shuffle);
|
||||||
ASSEMBLE_SIMD_IMM_INSTR(pshufd, i.OutputSimd128Register(), 0, shuffle);
|
ASSEMBLE_SIMD_IMM_INSTR(Pshufd, i.OutputSimd128Register(), 0, shuffle);
|
||||||
__ pblendw(i.OutputSimd128Register(), kScratchDoubleReg, i.InputInt8(3));
|
__ Pblendw(i.OutputSimd128Register(), kScratchDoubleReg, i.InputUint8(3));
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case kX64S16x8Blend: {
|
case kX64S16x8Blend: {
|
||||||
ASSEMBLE_SIMD_IMM_SHUFFLE(pblendw, SSE4_1, i.InputInt8(2));
|
ASSEMBLE_SIMD_IMM_SHUFFLE(Pblendw, i.InputUint8(2));
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case kX64S16x8HalfShuffle1: {
|
case kX64S16x8HalfShuffle1: {
|
||||||
XMMRegister dst = i.OutputSimd128Register();
|
XMMRegister dst = i.OutputSimd128Register();
|
||||||
ASSEMBLE_SIMD_IMM_INSTR(pshuflw, dst, 0, i.InputInt8(1));
|
ASSEMBLE_SIMD_IMM_INSTR(Pshuflw, dst, 0, i.InputUint8(1));
|
||||||
__ pshufhw(dst, dst, i.InputInt8(2));
|
__ Pshufhw(dst, dst, i.InputUint8(2));
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case kX64S16x8HalfShuffle2: {
|
case kX64S16x8HalfShuffle2: {
|
||||||
CpuFeatureScope sse_scope(tasm(), SSE4_1);
|
|
||||||
XMMRegister dst = i.OutputSimd128Register();
|
XMMRegister dst = i.OutputSimd128Register();
|
||||||
ASSEMBLE_SIMD_IMM_INSTR(pshuflw, kScratchDoubleReg, 1, i.InputInt8(2));
|
ASSEMBLE_SIMD_IMM_INSTR(Pshuflw, kScratchDoubleReg, 1, i.InputUint8(2));
|
||||||
__ pshufhw(kScratchDoubleReg, kScratchDoubleReg, i.InputInt8(3));
|
__ Pshufhw(kScratchDoubleReg, kScratchDoubleReg, i.InputUint8(3));
|
||||||
ASSEMBLE_SIMD_IMM_INSTR(pshuflw, dst, 0, i.InputInt8(2));
|
ASSEMBLE_SIMD_IMM_INSTR(Pshuflw, dst, 0, i.InputUint8(2));
|
||||||
__ pshufhw(dst, dst, i.InputInt8(3));
|
__ Pshufhw(dst, dst, i.InputUint8(3));
|
||||||
__ pblendw(dst, kScratchDoubleReg, i.InputInt8(4));
|
__ Pblendw(dst, kScratchDoubleReg, i.InputUint8(4));
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case kX64S8x16Alignr: {
|
case kX64S8x16Alignr: {
|
||||||
ASSEMBLE_SIMD_IMM_SHUFFLE(palignr, SSSE3, i.InputInt8(2));
|
ASSEMBLE_SIMD_IMM_SHUFFLE(Palignr, i.InputUint8(2));
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case kX64S16x8Dup: {
|
case kX64S16x8Dup: {
|
||||||
XMMRegister dst = i.OutputSimd128Register();
|
XMMRegister dst = i.OutputSimd128Register();
|
||||||
int8_t lane = i.InputInt8(1) & 0x7;
|
uint8_t lane = i.InputInt8(1) & 0x7;
|
||||||
int8_t lane4 = lane & 0x3;
|
uint8_t lane4 = lane & 0x3;
|
||||||
int8_t half_dup = lane4 | (lane4 << 2) | (lane4 << 4) | (lane4 << 6);
|
uint8_t half_dup = lane4 | (lane4 << 2) | (lane4 << 4) | (lane4 << 6);
|
||||||
if (lane < 4) {
|
if (lane < 4) {
|
||||||
ASSEMBLE_SIMD_IMM_INSTR(pshuflw, dst, 0, half_dup);
|
ASSEMBLE_SIMD_IMM_INSTR(Pshuflw, dst, 0, half_dup);
|
||||||
__ pshufd(dst, dst, 0);
|
__ Pshufd(dst, dst, static_cast<uint8_t>(0));
|
||||||
} else {
|
} else {
|
||||||
ASSEMBLE_SIMD_IMM_INSTR(pshufhw, dst, 0, half_dup);
|
ASSEMBLE_SIMD_IMM_INSTR(Pshufhw, dst, 0, half_dup);
|
||||||
__ pshufd(dst, dst, 0xaa);
|
__ Pshufd(dst, dst, static_cast<uint8_t>(0xaa));
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case kX64S8x16Dup: {
|
case kX64S8x16Dup: {
|
||||||
XMMRegister dst = i.OutputSimd128Register();
|
XMMRegister dst = i.OutputSimd128Register();
|
||||||
int8_t lane = i.InputInt8(1) & 0xf;
|
uint8_t lane = i.InputInt8(1) & 0xf;
|
||||||
DCHECK_EQ(dst, i.InputSimd128Register(0));
|
DCHECK_EQ(dst, i.InputSimd128Register(0));
|
||||||
if (lane < 8) {
|
if (lane < 8) {
|
||||||
__ punpcklbw(dst, dst);
|
__ Punpcklbw(dst, dst);
|
||||||
} else {
|
} else {
|
||||||
__ punpckhbw(dst, dst);
|
__ Punpckhbw(dst, dst);
|
||||||
}
|
}
|
||||||
lane &= 0x7;
|
lane &= 0x7;
|
||||||
int8_t lane4 = lane & 0x3;
|
uint8_t lane4 = lane & 0x3;
|
||||||
int8_t half_dup = lane4 | (lane4 << 2) | (lane4 << 4) | (lane4 << 6);
|
uint8_t half_dup = lane4 | (lane4 << 2) | (lane4 << 4) | (lane4 << 6);
|
||||||
if (lane < 4) {
|
if (lane < 4) {
|
||||||
__ pshuflw(dst, dst, half_dup);
|
__ Pshuflw(dst, dst, half_dup);
|
||||||
__ pshufd(dst, dst, 0);
|
__ Pshufd(dst, dst, static_cast<uint8_t>(0));
|
||||||
} else {
|
} else {
|
||||||
__ pshufhw(dst, dst, half_dup);
|
__ Pshufhw(dst, dst, half_dup);
|
||||||
__ pshufd(dst, dst, 0xaa);
|
__ Pshufd(dst, dst, static_cast<uint8_t>(0xaa));
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case kX64S64x2UnpackHigh:
|
case kX64S64x2UnpackHigh:
|
||||||
ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpckhqdq);
|
ASSEMBLE_SIMD_PUNPCK_SHUFFLE(Punpckhqdq);
|
||||||
break;
|
break;
|
||||||
case kX64S32x4UnpackHigh:
|
case kX64S32x4UnpackHigh:
|
||||||
ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpckhdq);
|
ASSEMBLE_SIMD_PUNPCK_SHUFFLE(Punpckhdq);
|
||||||
break;
|
break;
|
||||||
case kX64S16x8UnpackHigh:
|
case kX64S16x8UnpackHigh:
|
||||||
ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpckhwd);
|
ASSEMBLE_SIMD_PUNPCK_SHUFFLE(Punpckhwd);
|
||||||
break;
|
break;
|
||||||
case kX64S8x16UnpackHigh:
|
case kX64S8x16UnpackHigh:
|
||||||
ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpckhbw);
|
ASSEMBLE_SIMD_PUNPCK_SHUFFLE(Punpckhbw);
|
||||||
break;
|
break;
|
||||||
case kX64S64x2UnpackLow:
|
case kX64S64x2UnpackLow:
|
||||||
ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpcklqdq);
|
ASSEMBLE_SIMD_PUNPCK_SHUFFLE(Punpcklqdq);
|
||||||
break;
|
break;
|
||||||
case kX64S32x4UnpackLow:
|
case kX64S32x4UnpackLow:
|
||||||
ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpckldq);
|
ASSEMBLE_SIMD_PUNPCK_SHUFFLE(Punpckldq);
|
||||||
break;
|
break;
|
||||||
case kX64S16x8UnpackLow:
|
case kX64S16x8UnpackLow:
|
||||||
ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpcklwd);
|
ASSEMBLE_SIMD_PUNPCK_SHUFFLE(Punpcklwd);
|
||||||
break;
|
break;
|
||||||
case kX64S8x16UnpackLow:
|
case kX64S8x16UnpackLow:
|
||||||
ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpcklbw);
|
ASSEMBLE_SIMD_PUNPCK_SHUFFLE(Punpcklbw);
|
||||||
break;
|
break;
|
||||||
case kX64S16x8UnzipHigh: {
|
case kX64S16x8UnzipHigh: {
|
||||||
CpuFeatureScope sse_scope(tasm(), SSE4_1);
|
|
||||||
XMMRegister dst = i.OutputSimd128Register();
|
XMMRegister dst = i.OutputSimd128Register();
|
||||||
XMMRegister src2 = dst;
|
XMMRegister src2 = dst;
|
||||||
DCHECK_EQ(dst, i.InputSimd128Register(0));
|
DCHECK_EQ(dst, i.InputSimd128Register(0));
|
||||||
if (instr->InputCount() == 2) {
|
if (instr->InputCount() == 2) {
|
||||||
ASSEMBLE_SIMD_INSTR(movups, kScratchDoubleReg, 1);
|
ASSEMBLE_SIMD_INSTR(Movups, kScratchDoubleReg, 1);
|
||||||
__ psrld(kScratchDoubleReg, 16);
|
__ Psrld(kScratchDoubleReg, static_cast<byte>(16));
|
||||||
src2 = kScratchDoubleReg;
|
src2 = kScratchDoubleReg;
|
||||||
}
|
}
|
||||||
__ psrld(dst, 16);
|
__ Psrld(dst, static_cast<byte>(16));
|
||||||
__ packusdw(dst, src2);
|
__ Packusdw(dst, src2);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case kX64S16x8UnzipLow: {
|
case kX64S16x8UnzipLow: {
|
||||||
CpuFeatureScope sse_scope(tasm(), SSE4_1);
|
|
||||||
XMMRegister dst = i.OutputSimd128Register();
|
XMMRegister dst = i.OutputSimd128Register();
|
||||||
XMMRegister src2 = dst;
|
XMMRegister src2 = dst;
|
||||||
DCHECK_EQ(dst, i.InputSimd128Register(0));
|
DCHECK_EQ(dst, i.InputSimd128Register(0));
|
||||||
__ pxor(kScratchDoubleReg, kScratchDoubleReg);
|
__ Pxor(kScratchDoubleReg, kScratchDoubleReg);
|
||||||
if (instr->InputCount() == 2) {
|
if (instr->InputCount() == 2) {
|
||||||
ASSEMBLE_SIMD_IMM_INSTR(pblendw, kScratchDoubleReg, 1, 0x55);
|
ASSEMBLE_SIMD_IMM_INSTR(Pblendw, kScratchDoubleReg, 1,
|
||||||
|
static_cast<uint8_t>(0x55));
|
||||||
src2 = kScratchDoubleReg;
|
src2 = kScratchDoubleReg;
|
||||||
}
|
}
|
||||||
__ pblendw(dst, kScratchDoubleReg, 0xaa);
|
__ Pblendw(dst, kScratchDoubleReg, static_cast<uint8_t>(0xaa));
|
||||||
__ packusdw(dst, src2);
|
__ Packusdw(dst, src2);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case kX64S8x16UnzipHigh: {
|
case kX64S8x16UnzipHigh: {
|
||||||
@ -3877,12 +3873,12 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
|
|||||||
XMMRegister src2 = dst;
|
XMMRegister src2 = dst;
|
||||||
DCHECK_EQ(dst, i.InputSimd128Register(0));
|
DCHECK_EQ(dst, i.InputSimd128Register(0));
|
||||||
if (instr->InputCount() == 2) {
|
if (instr->InputCount() == 2) {
|
||||||
ASSEMBLE_SIMD_INSTR(movups, kScratchDoubleReg, 1);
|
ASSEMBLE_SIMD_INSTR(Movups, kScratchDoubleReg, 1);
|
||||||
__ psrlw(kScratchDoubleReg, 8);
|
__ Psrlw(kScratchDoubleReg, static_cast<byte>(8));
|
||||||
src2 = kScratchDoubleReg;
|
src2 = kScratchDoubleReg;
|
||||||
}
|
}
|
||||||
__ psrlw(dst, 8);
|
__ Psrlw(dst, static_cast<byte>(8));
|
||||||
__ packuswb(dst, src2);
|
__ Packuswb(dst, src2);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case kX64S8x16UnzipLow: {
|
case kX64S8x16UnzipLow: {
|
||||||
@ -3890,44 +3886,44 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
|
|||||||
XMMRegister src2 = dst;
|
XMMRegister src2 = dst;
|
||||||
DCHECK_EQ(dst, i.InputSimd128Register(0));
|
DCHECK_EQ(dst, i.InputSimd128Register(0));
|
||||||
if (instr->InputCount() == 2) {
|
if (instr->InputCount() == 2) {
|
||||||
ASSEMBLE_SIMD_INSTR(movups, kScratchDoubleReg, 1);
|
ASSEMBLE_SIMD_INSTR(Movups, kScratchDoubleReg, 1);
|
||||||
__ psllw(kScratchDoubleReg, 8);
|
__ Psllw(kScratchDoubleReg, static_cast<byte>(8));
|
||||||
__ psrlw(kScratchDoubleReg, 8);
|
__ Psrlw(kScratchDoubleReg, static_cast<byte>(8));
|
||||||
src2 = kScratchDoubleReg;
|
src2 = kScratchDoubleReg;
|
||||||
}
|
}
|
||||||
__ psllw(dst, 8);
|
__ Psllw(dst, static_cast<byte>(8));
|
||||||
__ psrlw(dst, 8);
|
__ Psrlw(dst, static_cast<byte>(8));
|
||||||
__ packuswb(dst, src2);
|
__ Packuswb(dst, src2);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case kX64S8x16TransposeLow: {
|
case kX64S8x16TransposeLow: {
|
||||||
XMMRegister dst = i.OutputSimd128Register();
|
XMMRegister dst = i.OutputSimd128Register();
|
||||||
DCHECK_EQ(dst, i.InputSimd128Register(0));
|
DCHECK_EQ(dst, i.InputSimd128Register(0));
|
||||||
__ psllw(dst, 8);
|
__ Psllw(dst, static_cast<byte>(8));
|
||||||
if (instr->InputCount() == 1) {
|
if (instr->InputCount() == 1) {
|
||||||
__ movups(kScratchDoubleReg, dst);
|
__ Movups(kScratchDoubleReg, dst);
|
||||||
} else {
|
} else {
|
||||||
DCHECK_EQ(2, instr->InputCount());
|
DCHECK_EQ(2, instr->InputCount());
|
||||||
ASSEMBLE_SIMD_INSTR(movups, kScratchDoubleReg, 1);
|
ASSEMBLE_SIMD_INSTR(Movups, kScratchDoubleReg, 1);
|
||||||
__ psllw(kScratchDoubleReg, 8);
|
__ Psllw(kScratchDoubleReg, static_cast<byte>(8));
|
||||||
}
|
}
|
||||||
__ psrlw(dst, 8);
|
__ Psrlw(dst, static_cast<byte>(8));
|
||||||
__ por(dst, kScratchDoubleReg);
|
__ Por(dst, kScratchDoubleReg);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case kX64S8x16TransposeHigh: {
|
case kX64S8x16TransposeHigh: {
|
||||||
XMMRegister dst = i.OutputSimd128Register();
|
XMMRegister dst = i.OutputSimd128Register();
|
||||||
DCHECK_EQ(dst, i.InputSimd128Register(0));
|
DCHECK_EQ(dst, i.InputSimd128Register(0));
|
||||||
__ psrlw(dst, 8);
|
__ Psrlw(dst, static_cast<byte>(8));
|
||||||
if (instr->InputCount() == 1) {
|
if (instr->InputCount() == 1) {
|
||||||
__ movups(kScratchDoubleReg, dst);
|
__ Movups(kScratchDoubleReg, dst);
|
||||||
} else {
|
} else {
|
||||||
DCHECK_EQ(2, instr->InputCount());
|
DCHECK_EQ(2, instr->InputCount());
|
||||||
ASSEMBLE_SIMD_INSTR(movups, kScratchDoubleReg, 1);
|
ASSEMBLE_SIMD_INSTR(Movups, kScratchDoubleReg, 1);
|
||||||
__ psrlw(kScratchDoubleReg, 8);
|
__ Psrlw(kScratchDoubleReg, static_cast<byte>(8));
|
||||||
}
|
}
|
||||||
__ psllw(kScratchDoubleReg, 8);
|
__ Psllw(kScratchDoubleReg, static_cast<byte>(8));
|
||||||
__ por(dst, kScratchDoubleReg);
|
__ Por(dst, kScratchDoubleReg);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case kX64S8x8Reverse:
|
case kX64S8x8Reverse:
|
||||||
@ -3938,14 +3934,14 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
|
|||||||
DCHECK_EQ(dst, i.InputSimd128Register(0));
|
DCHECK_EQ(dst, i.InputSimd128Register(0));
|
||||||
if (arch_opcode != kX64S8x2Reverse) {
|
if (arch_opcode != kX64S8x2Reverse) {
|
||||||
// First shuffle words into position.
|
// First shuffle words into position.
|
||||||
int8_t shuffle_mask = arch_opcode == kX64S8x4Reverse ? 0xB1 : 0x1B;
|
uint8_t shuffle_mask = arch_opcode == kX64S8x4Reverse ? 0xB1 : 0x1B;
|
||||||
__ pshuflw(dst, dst, shuffle_mask);
|
__ Pshuflw(dst, dst, shuffle_mask);
|
||||||
__ pshufhw(dst, dst, shuffle_mask);
|
__ Pshufhw(dst, dst, shuffle_mask);
|
||||||
}
|
}
|
||||||
__ movaps(kScratchDoubleReg, dst);
|
__ Movaps(kScratchDoubleReg, dst);
|
||||||
__ psrlw(kScratchDoubleReg, 8);
|
__ Psrlw(kScratchDoubleReg, static_cast<byte>(8));
|
||||||
__ psllw(dst, 8);
|
__ Psllw(dst, static_cast<byte>(8));
|
||||||
__ por(dst, kScratchDoubleReg);
|
__ Por(dst, kScratchDoubleReg);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case kX64S1x2AnyTrue:
|
case kX64S1x2AnyTrue:
|
||||||
|
@ -1096,6 +1096,11 @@ int DisassemblerX64::AVXInstruction(byte* data) {
|
|||||||
AppendToBuffer("vmovdqu %s,", NameOfXMMRegister(regop));
|
AppendToBuffer("vmovdqu %s,", NameOfXMMRegister(regop));
|
||||||
current += PrintRightXMMOperand(current);
|
current += PrintRightXMMOperand(current);
|
||||||
break;
|
break;
|
||||||
|
case 0x70:
|
||||||
|
AppendToBuffer("vpshufhw %s,", NameOfXMMRegister(regop));
|
||||||
|
current += PrintRightXMMOperand(current);
|
||||||
|
AppendToBuffer(",0x%x", *current++);
|
||||||
|
break;
|
||||||
case 0x7F:
|
case 0x7F:
|
||||||
AppendToBuffer("vmovdqu ");
|
AppendToBuffer("vmovdqu ");
|
||||||
current += PrintRightXMMOperand(current);
|
current += PrintRightXMMOperand(current);
|
||||||
|
@ -783,8 +783,11 @@ TEST(DisasmX64) {
|
|||||||
__ vpinsrq(xmm1, xmm2, rax, 9);
|
__ vpinsrq(xmm1, xmm2, rax, 9);
|
||||||
__ vpinsrq(xmm1, xmm2, Operand(rbx, rcx, times_4, 10000), 9);
|
__ vpinsrq(xmm1, xmm2, Operand(rbx, rcx, times_4, 10000), 9);
|
||||||
__ vpshufd(xmm1, xmm2, 85);
|
__ vpshufd(xmm1, xmm2, 85);
|
||||||
|
__ vpshufd(xmm1, Operand(rbx, rcx, times_4, 10000), 85);
|
||||||
__ vpshuflw(xmm1, xmm2, 85);
|
__ vpshuflw(xmm1, xmm2, 85);
|
||||||
__ vpshuflw(xmm1, Operand(rbx, rcx, times_4, 10000), 85);
|
__ vpshuflw(xmm1, Operand(rbx, rcx, times_4, 10000), 85);
|
||||||
|
__ vpshufhw(xmm1, xmm2, 85);
|
||||||
|
__ vpshufhw(xmm1, Operand(rbx, rcx, times_4, 10000), 85);
|
||||||
__ vshufps(xmm3, xmm2, xmm3, 3);
|
__ vshufps(xmm3, xmm2, xmm3, 3);
|
||||||
__ vpblendw(xmm1, xmm2, xmm3, 23);
|
__ vpblendw(xmm1, xmm2, xmm3, 23);
|
||||||
__ vpblendw(xmm1, xmm2, Operand(rbx, rcx, times_4, 10000), 23);
|
__ vpblendw(xmm1, xmm2, Operand(rbx, rcx, times_4, 10000), 23);
|
||||||
|
Loading…
Reference in New Issue
Block a user