[wasm-simd][ia32] Consolidate SSE/AVX opcodes for SIMD unops

Bug: v8:11217
Change-Id: Ic58b0ac90fa227cadc35829bd1e5629f6749020a
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2616102
Commit-Queue: Deepti Gandluri <gdeepti@chromium.org>
Reviewed-by: Zhi An Ng <zhin@chromium.org>
Cr-Commit-Position: refs/heads/master@{#72083}
This commit is contained in:
Deepti Gandluri 2021-01-13 09:25:25 -08:00 committed by Commit Bot
parent 8b77fd9df1
commit 31bf056fbf
4 changed files with 40 additions and 72 deletions

View File

@ -2435,36 +2435,32 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ Addps(dst, dst, kScratchDoubleReg); // add hi and lo, may round.
break;
}
case kSSEF32x4Abs: {
case kIA32F32x4Abs: {
XMMRegister dst = i.OutputSimd128Register();
DCHECK_EQ(i.InputSimd128Register(0), dst);
__ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
__ psrld(kScratchDoubleReg, 1);
__ andps(dst, kScratchDoubleReg);
XMMRegister src = i.InputSimd128Register(0);
if (dst == src) {
__ Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
__ Psrld(kScratchDoubleReg, kScratchDoubleReg, 1);
__ Andps(dst, kScratchDoubleReg);
} else {
__ Pcmpeqd(dst, dst);
__ Psrld(dst, dst, 1);
__ Andps(dst, src);
}
break;
}
case kAVXF32x4Abs: {
CpuFeatureScope avx_scope(tasm(), AVX);
__ vpcmpeqd(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
__ vpsrld(kScratchDoubleReg, kScratchDoubleReg, 1);
__ vandps(i.OutputSimd128Register(), kScratchDoubleReg,
i.InputOperand(0));
break;
}
case kSSEF32x4Neg: {
case kIA32F32x4Neg: {
XMMRegister dst = i.OutputSimd128Register();
DCHECK_EQ(dst, i.InputSimd128Register(0));
__ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
__ pslld(kScratchDoubleReg, 31);
__ xorps(dst, kScratchDoubleReg);
break;
XMMRegister src = i.InputSimd128Register(0);
if (dst == src) {
__ Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
__ Pslld(kScratchDoubleReg, kScratchDoubleReg, 31);
__ Xorps(dst, kScratchDoubleReg);
} else {
__ Pcmpeqd(dst, dst);
__ Pslld(dst, dst, 31);
__ Xorps(dst, src);
}
case kAVXF32x4Neg: {
CpuFeatureScope avx_scope(tasm(), AVX);
__ vpcmpeqd(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
__ vpslld(kScratchDoubleReg, kScratchDoubleReg, 31);
__ vxorps(i.OutputSimd128Register(), kScratchDoubleReg,
i.InputOperand(0));
break;
}
case kIA32F32x4Sqrt: {
@ -3824,17 +3820,16 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ Pcmpeqd(dst, dst);
break;
}
case kSSES128Not: {
case kIA32S128Not: {
XMMRegister dst = i.OutputSimd128Register();
DCHECK_EQ(dst, i.InputSimd128Register(0));
__ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
__ pxor(dst, kScratchDoubleReg);
break;
XMMRegister src = i.InputSimd128Register(0);
if (dst == src) {
__ Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
__ Pxor(dst, kScratchDoubleReg);
} else {
__ Pcmpeqd(dst, dst);
__ Pxor(dst, src);
}
case kAVXS128Not: {
CpuFeatureScope avx_scope(tasm(), AVX);
__ vpcmpeqd(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
__ vpxor(i.OutputSimd128Register(), kScratchDoubleReg, i.InputOperand(0));
break;
}
case kSSES128And: {

View File

@ -163,10 +163,8 @@ namespace compiler {
V(IA32Insertps) \
V(IA32F32x4SConvertI32x4) \
V(IA32F32x4UConvertI32x4) \
V(SSEF32x4Abs) \
V(AVXF32x4Abs) \
V(SSEF32x4Neg) \
V(AVXF32x4Neg) \
V(IA32F32x4Abs) \
V(IA32F32x4Neg) \
V(IA32F32x4Sqrt) \
V(IA32F32x4RecipApprox) \
V(IA32F32x4RecipSqrtApprox) \
@ -359,8 +357,7 @@ namespace compiler {
V(IA32S128Const) \
V(IA32S128Zero) \
V(IA32S128AllOnes) \
V(SSES128Not) \
V(AVXS128Not) \
V(IA32S128Not) \
V(SSES128And) \
V(AVXS128And) \
V(SSES128Or) \

View File

@ -145,10 +145,8 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kIA32Insertps:
case kIA32F32x4SConvertI32x4:
case kIA32F32x4UConvertI32x4:
case kSSEF32x4Abs:
case kAVXF32x4Abs:
case kSSEF32x4Neg:
case kAVXF32x4Neg:
case kIA32F32x4Abs:
case kIA32F32x4Neg:
case kIA32F32x4Sqrt:
case kIA32F32x4RecipApprox:
case kIA32F32x4RecipSqrtApprox:
@ -341,8 +339,7 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kIA32S128Const:
case kIA32S128Zero:
case kIA32S128AllOnes:
case kSSES128Not:
case kAVXS128Not:
case kIA32S128Not:
case kSSES128And:
case kAVXS128And:
case kSSES128Or:

View File

@ -2279,6 +2279,8 @@ void InstructionSelector::VisitWord32AtomicPairCompareExchange(Node* node) {
V(I16x8ExtMulHighI8x16U)
#define SIMD_UNOP_LIST(V) \
V(F32x4Abs) \
V(F32x4Neg) \
V(F32x4Sqrt) \
V(F32x4SConvertI32x4) \
V(F32x4RecipApprox) \
@ -2303,11 +2305,7 @@ void InstructionSelector::VisitWord32AtomicPairCompareExchange(Node* node) {
V(I16x8Abs) \
V(I8x16Neg) \
V(I8x16Abs) \
V(I8x16BitMask)
#define SIMD_UNOP_PREFIX_LIST(V) \
V(F32x4Abs) \
V(F32x4Neg) \
V(I8x16BitMask) \
V(S128Not)
#define SIMD_ANYTRUE_LIST(V) \
@ -2612,25 +2610,6 @@ SIMD_UNOP_LIST(VISIT_SIMD_UNOP)
#undef VISIT_SIMD_UNOP
#undef SIMD_UNOP_LIST
// TODO(v8:9198): SSE instructions that read 16 bytes from memory require the
// operand to be 16-byte aligned. AVX instructions relax this requirement, but
// might have reduced performance if the memory crosses cache line. But since we
// have limited xmm registers, this might be okay to alleviate register
// pressure.
#define VISIT_SIMD_UNOP_PREFIX(Opcode) \
void InstructionSelector::Visit##Opcode(Node* node) { \
IA32OperandGenerator g(this); \
if (IsSupported(AVX)) { \
Emit(kAVX##Opcode, g.DefineAsRegister(node), g.Use(node->InputAt(0))); \
} else { \
Emit(kSSE##Opcode, g.DefineSameAsFirst(node), \
g.UseRegister(node->InputAt(0))); \
} \
}
SIMD_UNOP_PREFIX_LIST(VISIT_SIMD_UNOP_PREFIX)
#undef VISIT_SIMD_UNOP_PREFIX
#undef SIMD_UNOP_PREFIX_LIST
// The implementation of AnyTrue is the same for all shapes.
#define VISIT_SIMD_ANYTRUE(Opcode) \
void InstructionSelector::Visit##Opcode(Node* node) { \