[wasm-simd] Add AVX codegen for some integer SIMD operations

Bug: v8:9854
Change-Id: I8450e6456321f3be2badeccac61414e005df2c65
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/1860327
Commit-Queue: Deepti Gandluri <gdeepti@chromium.org>
Reviewed-by: Zhi An Ng <zhin@chromium.org>
Cr-Commit-Position: refs/heads/master@{#64281}
This commit is contained in:
Deepti Gandluri 2019-10-14 15:28:57 -07:00 committed by Commit Bot
parent 031193b4c5
commit 12d84b69dd
3 changed files with 123 additions and 28 deletions

View File

@ -1742,7 +1742,11 @@ void TurboAssembler::Pextrd(Register dst, XMMRegister src, int8_t imm8) {
Movd(dst, src);
return;
}
if (CpuFeatures::IsSupported(SSE4_1)) {
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope scope(this, AVX);
vpextrd(dst, src, imm8);
return;
} else if (CpuFeatures::IsSupported(SSE4_1)) {
CpuFeatureScope sse_scope(this, SSE4_1);
pextrd(dst, src, imm8);
return;
@ -1752,8 +1756,38 @@ void TurboAssembler::Pextrd(Register dst, XMMRegister src, int8_t imm8) {
shrq(dst, Immediate(32));
}
void TurboAssembler::Pextrw(Register dst, XMMRegister src, int8_t imm8) {
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope scope(this, AVX);
vpextrw(dst, src, imm8);
return;
} else {
DCHECK(CpuFeatures::IsSupported(SSE4_1));
CpuFeatureScope sse_scope(this, SSE4_1);
pextrw(dst, src, imm8);
return;
}
}
void TurboAssembler::Pextrb(Register dst, XMMRegister src, int8_t imm8) {
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope scope(this, AVX);
vpextrb(dst, src, imm8);
return;
} else {
DCHECK(CpuFeatures::IsSupported(SSE4_1));
CpuFeatureScope sse_scope(this, SSE4_1);
pextrb(dst, src, imm8);
return;
}
}
void TurboAssembler::Pinsrd(XMMRegister dst, Register src, int8_t imm8) {
if (CpuFeatures::IsSupported(SSE4_1)) {
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope scope(this, AVX);
vpinsrd(dst, dst, src, imm8);
return;
} else if (CpuFeatures::IsSupported(SSE4_1)) {
CpuFeatureScope sse_scope(this, SSE4_1);
pinsrd(dst, src, imm8);
return;
@ -1768,7 +1802,11 @@ void TurboAssembler::Pinsrd(XMMRegister dst, Register src, int8_t imm8) {
}
void TurboAssembler::Pinsrd(XMMRegister dst, Operand src, int8_t imm8) {
if (CpuFeatures::IsSupported(SSE4_1)) {
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope scope(this, AVX);
vpinsrd(dst, dst, src, imm8);
return;
} else if (CpuFeatures::IsSupported(SSE4_1)) {
CpuFeatureScope sse_scope(this, SSE4_1);
pinsrd(dst, src, imm8);
return;
@ -1782,6 +1820,56 @@ void TurboAssembler::Pinsrd(XMMRegister dst, Operand src, int8_t imm8) {
}
}
void TurboAssembler::Pinsrw(XMMRegister dst, Register src, int8_t imm8) {
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope scope(this, AVX);
vpinsrw(dst, dst, src, imm8);
return;
} else {
DCHECK(CpuFeatures::IsSupported(SSE4_1));
CpuFeatureScope sse_scope(this, SSE4_1);
pinsrw(dst, src, imm8);
return;
}
}
void TurboAssembler::Pinsrw(XMMRegister dst, Operand src, int8_t imm8) {
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope scope(this, AVX);
vpinsrw(dst, dst, src, imm8);
return;
} else {
CpuFeatureScope sse_scope(this, SSE4_1);
pinsrw(dst, src, imm8);
return;
}
}
void TurboAssembler::Pinsrb(XMMRegister dst, Register src, int8_t imm8) {
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope scope(this, AVX);
vpinsrb(dst, dst, src, imm8);
return;
} else {
DCHECK(CpuFeatures::IsSupported(SSE4_1));
CpuFeatureScope sse_scope(this, SSE4_1);
pinsrb(dst, src, imm8);
return;
}
}
void TurboAssembler::Pinsrb(XMMRegister dst, Operand src, int8_t imm8) {
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope scope(this, AVX);
vpinsrb(dst, dst, src, imm8);
return;
} else {
CpuFeatureScope sse_scope(this, SSE4_1);
pinsrb(dst, src, imm8);
return;
}
}
void TurboAssembler::Psllq(XMMRegister dst, byte imm8) {
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope scope(this, AVX);

View File

@ -152,11 +152,12 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
AVX_OP(Roundsd, roundsd)
AVX_OP(Sqrtss, sqrtss)
AVX_OP(Sqrtsd, sqrtsd)
AVX_OP(Sqrtpd, sqrtpd)
AVX_OP(Ucomiss, ucomiss)
AVX_OP(Ucomisd, ucomisd)
AVX_OP(Pshufb, pshufb)
AVX_OP(Paddusb, paddusb)
AVX_OP(Sqrtpd, sqrtpd)
AVX_OP(Por, por)
#undef AVX_OP
@ -370,8 +371,14 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
// Non-SSE2 instructions.
void Pextrd(Register dst, XMMRegister src, int8_t imm8);
void Pextrw(Register dst, XMMRegister src, int8_t imm8);
void Pextrb(Register dst, XMMRegister src, int8_t imm8);
void Pinsrd(XMMRegister dst, Register src, int8_t imm8);
void Pinsrd(XMMRegister dst, Operand src, int8_t imm8);
void Pinsrw(XMMRegister dst, Register src, int8_t imm8);
void Pinsrw(XMMRegister dst, Operand src, int8_t imm8);
void Pinsrb(XMMRegister dst, Register src, int8_t imm8);
void Pinsrb(XMMRegister dst, Operand src, int8_t imm8);
void Psllq(XMMRegister dst, byte imm8);
void Psrlq(XMMRegister dst, byte imm8);

View File

@ -2884,11 +2884,11 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
case kX64I32x4Splat: {
XMMRegister dst = i.OutputSimd128Register();
if (HasRegisterInput(instr, 0)) {
__ movd(dst, i.InputRegister(0));
__ Movd(dst, i.InputRegister(0));
} else {
__ movd(dst, i.InputOperand(0));
__ Movd(dst, i.InputOperand(0));
}
__ pshufd(dst, dst, 0x0);
__ Pshufd(dst, dst, 0x0);
break;
}
case kX64I32x4ExtractLane: {
@ -3117,16 +3117,16 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
case kX64I16x8ExtractLane: {
CpuFeatureScope sse_scope(tasm(), SSE4_1);
Register dst = i.OutputRegister();
__ pextrw(dst, i.InputSimd128Register(0), i.InputInt8(1));
__ Pextrw(dst, i.InputSimd128Register(0), i.InputInt8(1));
break;
}
case kX64I16x8ReplaceLane: {
CpuFeatureScope sse_scope(tasm(), SSE4_1);
if (HasRegisterInput(instr, 2)) {
__ pinsrw(i.OutputSimd128Register(), i.InputRegister(2),
__ Pinsrw(i.OutputSimd128Register(), i.InputRegister(2),
i.InputInt8(1));
} else {
__ pinsrw(i.OutputSimd128Register(), i.InputOperand(2), i.InputInt8(1));
__ Pinsrw(i.OutputSimd128Register(), i.InputOperand(2), i.InputInt8(1));
}
break;
}
@ -3311,27 +3311,27 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
CpuFeatureScope sse_scope(tasm(), SSSE3);
XMMRegister dst = i.OutputSimd128Register();
if (HasRegisterInput(instr, 0)) {
__ movd(dst, i.InputRegister(0));
__ Movd(dst, i.InputRegister(0));
} else {
__ movd(dst, i.InputOperand(0));
__ Movd(dst, i.InputOperand(0));
}
__ xorps(kScratchDoubleReg, kScratchDoubleReg);
__ pshufb(dst, kScratchDoubleReg);
__ Xorps(kScratchDoubleReg, kScratchDoubleReg);
__ Pshufb(dst, kScratchDoubleReg);
break;
}
case kX64I8x16ExtractLane: {
CpuFeatureScope sse_scope(tasm(), SSE4_1);
Register dst = i.OutputRegister();
__ pextrb(dst, i.InputSimd128Register(0), i.InputInt8(1));
__ Pextrb(dst, i.InputSimd128Register(0), i.InputInt8(1));
break;
}
case kX64I8x16ReplaceLane: {
CpuFeatureScope sse_scope(tasm(), SSE4_1);
if (HasRegisterInput(instr, 2)) {
__ pinsrb(i.OutputSimd128Register(), i.InputRegister(2),
__ Pinsrb(i.OutputSimd128Register(), i.InputRegister(2),
i.InputInt8(1));
} else {
__ pinsrb(i.OutputSimd128Register(), i.InputOperand(2), i.InputInt8(1));
__ Pinsrb(i.OutputSimd128Register(), i.InputOperand(2), i.InputInt8(1));
}
break;
}
@ -3509,7 +3509,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
break;
}
case kX64I8x16AddSaturateU: {
__ paddusb(i.OutputSimd128Register(), i.InputSimd128Register(1));
__ Paddusb(i.OutputSimd128Register(), i.InputSimd128Register(1));
break;
}
case kX64I8x16SubSaturateU: {
@ -3574,10 +3574,10 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
case kX64S128Select: {
// Mask used here is stored in dst.
XMMRegister dst = i.OutputSimd128Register();
__ movaps(kScratchDoubleReg, i.InputSimd128Register(1));
__ xorps(kScratchDoubleReg, i.InputSimd128Register(2));
__ andps(dst, kScratchDoubleReg);
__ xorps(dst, i.InputSimd128Register(2));
__ Movaps(kScratchDoubleReg, i.InputSimd128Register(1));
__ Xorps(kScratchDoubleReg, i.InputSimd128Register(2));
__ Andps(dst, kScratchDoubleReg);
__ Xorps(dst, i.InputSimd128Register(2));
break;
}
case kX64S8x16Swizzle: {
@ -3608,10 +3608,10 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
}
SetupShuffleMaskOnStack(tasm(), mask);
__ pshufb(dst, Operand(rsp, 0));
__ Pshufb(dst, Operand(rsp, 0));
} else { // two input operands
DCHECK_EQ(6, instr->InputCount());
ASSEMBLE_SIMD_INSTR(movups, kScratchDoubleReg, 0);
ASSEMBLE_SIMD_INSTR(Movups, kScratchDoubleReg, 0);
uint32_t mask[4] = {};
for (int j = 5; j > 1; j--) {
uint32_t lanes = i.InputUint32(j);
@ -3621,13 +3621,13 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
}
}
SetupShuffleMaskOnStack(tasm(), mask);
__ pshufb(kScratchDoubleReg, Operand(rsp, 0));
__ Pshufb(kScratchDoubleReg, Operand(rsp, 0));
uint32_t mask1[4] = {};
if (instr->InputAt(1)->IsSimd128Register()) {
XMMRegister src1 = i.InputSimd128Register(1);
if (src1 != dst) __ movups(dst, src1);
} else {
__ movups(dst, i.InputOperand(1));
__ Movups(dst, i.InputOperand(1));
}
for (int j = 5; j > 1; j--) {
uint32_t lanes = i.InputUint32(j);
@ -3637,8 +3637,8 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
}
}
SetupShuffleMaskOnStack(tasm(), mask1);
__ pshufb(dst, Operand(rsp, 0));
__ por(dst, kScratchDoubleReg);
__ Pshufb(dst, Operand(rsp, 0));
__ Por(dst, kScratchDoubleReg);
}
__ movq(rsp, tmp);
break;