[wasm-simd][ia32] Consolidate some SSE/AVX i8x16 opcodes

Bug: v8:11217
Change-Id: I6e61b11babc0baecf7b1982ef779b941d3344182
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2667971
Reviewed-by: Deepti Gandluri <gdeepti@chromium.org>
Commit-Queue: Zhi An Ng <zhin@chromium.org>
Cr-Commit-Position: refs/heads/master@{#72493}
This commit is contained in:
Ng Zhi An 2021-02-02 09:58:15 -08:00 committed by Commit Bot
parent d913f0461d
commit 3fca8f85c9
5 changed files with 84 additions and 170 deletions

View File

@ -442,6 +442,17 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
AVX_PACKED_OP3(Pavgb, pavgb)
AVX_PACKED_OP3(Pavgw, pavgw)
AVX_PACKED_OP3(Pand, pand)
AVX_PACKED_OP3(Pminub, pminub)
AVX_PACKED_OP3(Pmaxub, pmaxub)
AVX_PACKED_OP3(Paddusb, paddusb)
AVX_PACKED_OP3(Psubusb, psubusb)
AVX_PACKED_OP3(Pcmpgtb, pcmpgtb)
AVX_PACKED_OP3(Pcmpeqb, pcmpeqb)
AVX_PACKED_OP3(Paddb, paddb)
AVX_PACKED_OP3(Paddsb, paddsb)
AVX_PACKED_OP3(Psubb, psubb)
AVX_PACKED_OP3(Psubsb, psubsb)
#undef AVX_PACKED_OP3
AVX_PACKED_OP3_WITH_TYPE(Psllw, psllw, XMMRegister, uint8_t)
@ -527,6 +538,8 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
AVX_OP3_XO_SSE4(Pmaxsd, pmaxsd)
AVX_OP3_WITH_TYPE_SCOPE(Pmaddubsw, pmaddubsw, XMMRegister, XMMRegister, SSSE3)
AVX_OP3_XO_SSE4(Pminsb, pminsb)
AVX_OP3_XO_SSE4(Pmaxsb, pmaxsb)
#undef AVX_OP3_XO_SSE4
#undef AVX_OP3_WITH_TYPE_SCOPE

View File

@ -3409,50 +3409,26 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
}
break;
}
case kSSEI8x16Add: {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
__ paddb(i.OutputSimd128Register(), i.InputOperand(1));
case kIA32I8x16Add: {
__ Paddb(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputOperand(1));
break;
}
case kAVXI8x16Add: {
CpuFeatureScope avx_scope(tasm(), AVX);
__ vpaddb(i.OutputSimd128Register(), i.InputSimd128Register(0),
case kIA32I8x16AddSatS: {
__ Paddsb(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputOperand(1));
break;
}
case kSSEI8x16AddSatS: {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
__ paddsb(i.OutputSimd128Register(), i.InputOperand(1));
case kIA32I8x16Sub: {
__ Psubb(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputOperand(1));
break;
}
case kAVXI8x16AddSatS: {
CpuFeatureScope avx_scope(tasm(), AVX);
__ vpaddsb(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputOperand(1));
break;
}
case kSSEI8x16Sub: {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
__ psubb(i.OutputSimd128Register(), i.InputOperand(1));
break;
}
case kAVXI8x16Sub: {
CpuFeatureScope avx_scope(tasm(), AVX);
__ vpsubb(i.OutputSimd128Register(), i.InputSimd128Register(0),
case kIA32I8x16SubSatS: {
__ Psubsb(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputOperand(1));
break;
}
case kSSEI8x16SubSatS: {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
__ psubsb(i.OutputSimd128Register(), i.InputOperand(1));
break;
}
case kAVXI8x16SubSatS: {
CpuFeatureScope avx_scope(tasm(), AVX);
__ vpsubsb(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputOperand(1));
break;
}
case kSSEI8x16Mul: {
XMMRegister dst = i.OutputSimd128Register();
DCHECK_EQ(dst, i.InputSimd128Register(0));
@ -3532,41 +3508,21 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ vpor(dst, dst, tmp);
break;
}
case kSSEI8x16MinS: {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
CpuFeatureScope sse_scope(tasm(), SSE4_1);
__ pminsb(i.OutputSimd128Register(), i.InputOperand(1));
case kIA32I8x16MinS: {
__ Pminsb(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputOperand(1));
break;
}
case kAVXI8x16MinS: {
CpuFeatureScope avx_scope(tasm(), AVX);
__ vpminsb(i.OutputSimd128Register(), i.InputSimd128Register(0),
case kIA32I8x16MaxS: {
__ Pmaxsb(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputOperand(1));
break;
}
case kIA32I8x16Eq: {
__ Pcmpeqb(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputOperand(1));
break;
}
case kSSEI8x16MaxS: {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
CpuFeatureScope sse_scope(tasm(), SSE4_1);
__ pmaxsb(i.OutputSimd128Register(), i.InputOperand(1));
break;
}
case kAVXI8x16MaxS: {
CpuFeatureScope avx_scope(tasm(), AVX);
__ vpmaxsb(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputOperand(1));
break;
}
case kSSEI8x16Eq: {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
__ pcmpeqb(i.OutputSimd128Register(), i.InputOperand(1));
break;
}
case kAVXI8x16Eq: {
CpuFeatureScope avx_scope(tasm(), AVX);
__ vpcmpeqb(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputOperand(1));
break;
}
case kSSEI8x16Ne: {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
__ pcmpeqb(i.OutputSimd128Register(), i.InputOperand(1));
@ -3583,15 +3539,9 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
kScratchDoubleReg);
break;
}
case kSSEI8x16GtS: {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
__ pcmpgtb(i.OutputSimd128Register(), i.InputOperand(1));
break;
}
case kAVXI8x16GtS: {
CpuFeatureScope avx_scope(tasm(), AVX);
__ vpcmpgtb(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputOperand(1));
case kIA32I8x16GtS: {
__ Pcmpgtb(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputOperand(1));
break;
}
case kSSEI8x16GeS: {
@ -3625,26 +3575,14 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ vpackuswb(dst, dst, i.InputOperand(1));
break;
}
case kSSEI8x16AddSatU: {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
__ paddusb(i.OutputSimd128Register(), i.InputOperand(1));
case kIA32I8x16AddSatU: {
__ Paddusb(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputOperand(1));
break;
}
case kAVXI8x16AddSatU: {
CpuFeatureScope avx_scope(tasm(), AVX);
__ vpaddusb(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputOperand(1));
break;
}
case kSSEI8x16SubSatU: {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
__ psubusb(i.OutputSimd128Register(), i.InputOperand(1));
break;
}
case kAVXI8x16SubSatU: {
CpuFeatureScope avx_scope(tasm(), AVX);
__ vpsubusb(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputOperand(1));
case kIA32I8x16SubSatU: {
__ Psubusb(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputOperand(1));
break;
}
case kIA32I8x16ShrU: {
@ -3679,27 +3617,14 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
}
break;
}
case kSSEI8x16MinU: {
XMMRegister dst = i.OutputSimd128Register();
DCHECK_EQ(dst, i.InputSimd128Register(0));
__ pminub(dst, i.InputOperand(1));
case kIA32I8x16MinU: {
__ Pminub(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputOperand(1));
break;
}
case kAVXI8x16MinU: {
CpuFeatureScope avx_scope(tasm(), AVX);
__ vpminub(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputOperand(1));
break;
}
case kSSEI8x16MaxU: {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
__ pmaxub(i.OutputSimd128Register(), i.InputOperand(1));
break;
}
case kAVXI8x16MaxU: {
CpuFeatureScope avx_scope(tasm(), AVX);
__ vpmaxub(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputOperand(1));
case kIA32I8x16MaxU: {
__ Pmaxub(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputOperand(1));
break;
}
case kSSEI8x16GtU: {

View File

@ -313,39 +313,27 @@ namespace compiler {
V(IA32I8x16Neg) \
V(IA32I8x16Shl) \
V(IA32I8x16ShrS) \
V(SSEI8x16Add) \
V(AVXI8x16Add) \
V(SSEI8x16AddSatS) \
V(AVXI8x16AddSatS) \
V(SSEI8x16Sub) \
V(AVXI8x16Sub) \
V(SSEI8x16SubSatS) \
V(AVXI8x16SubSatS) \
V(IA32I8x16Add) \
V(IA32I8x16AddSatS) \
V(IA32I8x16Sub) \
V(IA32I8x16SubSatS) \
V(SSEI8x16Mul) \
V(AVXI8x16Mul) \
V(SSEI8x16MinS) \
V(AVXI8x16MinS) \
V(SSEI8x16MaxS) \
V(AVXI8x16MaxS) \
V(SSEI8x16Eq) \
V(AVXI8x16Eq) \
V(IA32I8x16MinS) \
V(IA32I8x16MaxS) \
V(IA32I8x16Eq) \
V(SSEI8x16Ne) \
V(AVXI8x16Ne) \
V(SSEI8x16GtS) \
V(AVXI8x16GtS) \
V(IA32I8x16GtS) \
V(SSEI8x16GeS) \
V(AVXI8x16GeS) \
V(SSEI8x16UConvertI16x8) \
V(AVXI8x16UConvertI16x8) \
V(SSEI8x16AddSatU) \
V(AVXI8x16AddSatU) \
V(SSEI8x16SubSatU) \
V(AVXI8x16SubSatU) \
V(IA32I8x16AddSatU) \
V(IA32I8x16SubSatU) \
V(IA32I8x16ShrU) \
V(SSEI8x16MinU) \
V(AVXI8x16MinU) \
V(SSEI8x16MaxU) \
V(AVXI8x16MaxU) \
V(IA32I8x16MinU) \
V(IA32I8x16MaxU) \
V(SSEI8x16GtU) \
V(AVXI8x16GtU) \
V(SSEI8x16GeU) \

View File

@ -295,39 +295,27 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kIA32I8x16Neg:
case kIA32I8x16Shl:
case kIA32I8x16ShrS:
case kSSEI8x16Add:
case kAVXI8x16Add:
case kSSEI8x16AddSatS:
case kAVXI8x16AddSatS:
case kSSEI8x16Sub:
case kAVXI8x16Sub:
case kSSEI8x16SubSatS:
case kAVXI8x16SubSatS:
case kIA32I8x16Add:
case kIA32I8x16AddSatS:
case kIA32I8x16Sub:
case kIA32I8x16SubSatS:
case kSSEI8x16Mul:
case kAVXI8x16Mul:
case kSSEI8x16MinS:
case kAVXI8x16MinS:
case kSSEI8x16MaxS:
case kAVXI8x16MaxS:
case kSSEI8x16Eq:
case kAVXI8x16Eq:
case kIA32I8x16MinS:
case kIA32I8x16MaxS:
case kIA32I8x16Eq:
case kSSEI8x16Ne:
case kAVXI8x16Ne:
case kSSEI8x16GtS:
case kAVXI8x16GtS:
case kIA32I8x16GtS:
case kSSEI8x16GeS:
case kAVXI8x16GeS:
case kSSEI8x16UConvertI16x8:
case kAVXI8x16UConvertI16x8:
case kSSEI8x16AddSatU:
case kAVXI8x16AddSatU:
case kSSEI8x16SubSatU:
case kAVXI8x16SubSatU:
case kIA32I8x16AddSatU:
case kIA32I8x16SubSatU:
case kIA32I8x16ShrU:
case kSSEI8x16MinU:
case kAVXI8x16MinU:
case kSSEI8x16MaxU:
case kAVXI8x16MaxU:
case kIA32I8x16MinU:
case kIA32I8x16MaxU:
case kSSEI8x16GtU:
case kAVXI8x16GtU:
case kSSEI8x16GeU:

View File

@ -2256,20 +2256,8 @@ void InstructionSelector::VisitWord32AtomicPairCompareExchange(Node* node) {
V(I16x8GtU) \
V(I16x8GeU) \
V(I8x16SConvertI16x8) \
V(I8x16Add) \
V(I8x16AddSatS) \
V(I8x16Sub) \
V(I8x16SubSatS) \
V(I8x16MinS) \
V(I8x16MaxS) \
V(I8x16Eq) \
V(I8x16Ne) \
V(I8x16GtS) \
V(I8x16GeS) \
V(I8x16AddSatU) \
V(I8x16SubSatU) \
V(I8x16MinU) \
V(I8x16MaxU) \
V(I8x16GtU) \
V(I8x16GeU) \
V(S128And) \
@ -2287,6 +2275,18 @@ void InstructionSelector::VisitWord32AtomicPairCompareExchange(Node* node) {
V(I64x2Eq) \
V(I32x4DotI16x8S) \
V(I16x8RoundingAverageU) \
V(I8x16Add) \
V(I8x16AddSatS) \
V(I8x16Sub) \
V(I8x16SubSatS) \
V(I8x16MinS) \
V(I8x16MaxS) \
V(I8x16Eq) \
V(I8x16GtS) \
V(I8x16AddSatU) \
V(I8x16SubSatU) \
V(I8x16MinU) \
V(I8x16MaxU) \
V(I8x16RoundingAverageU)
// These opcodes require all inputs to be registers because the codegen is