[wasm-simd][ia32] Consolidate SSE/AVX opcodes for SIMD unops

Bug: v8:11217
Change-Id: Ic58b0ac90fa227cadc35829bd1e5629f6749020a
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2616102
Commit-Queue: Deepti Gandluri <gdeepti@chromium.org>
Reviewed-by: Zhi An Ng <zhin@chromium.org>
Cr-Commit-Position: refs/heads/master@{#72083}
This commit is contained in:
Deepti Gandluri 2021-01-13 09:25:25 -08:00 committed by Commit Bot
parent 8b77fd9df1
commit 31bf056fbf
4 changed files with 40 additions and 72 deletions

View File

@ -2435,36 +2435,32 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ Addps(dst, dst, kScratchDoubleReg); // add hi and lo, may round. __ Addps(dst, dst, kScratchDoubleReg); // add hi and lo, may round.
break; break;
} }
case kSSEF32x4Abs: { case kIA32F32x4Abs: {
XMMRegister dst = i.OutputSimd128Register(); XMMRegister dst = i.OutputSimd128Register();
DCHECK_EQ(i.InputSimd128Register(0), dst); XMMRegister src = i.InputSimd128Register(0);
__ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg); if (dst == src) {
__ psrld(kScratchDoubleReg, 1); __ Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
__ andps(dst, kScratchDoubleReg); __ Psrld(kScratchDoubleReg, kScratchDoubleReg, 1);
__ Andps(dst, kScratchDoubleReg);
} else {
__ Pcmpeqd(dst, dst);
__ Psrld(dst, dst, 1);
__ Andps(dst, src);
}
break; break;
} }
case kAVXF32x4Abs: { case kIA32F32x4Neg: {
CpuFeatureScope avx_scope(tasm(), AVX);
__ vpcmpeqd(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
__ vpsrld(kScratchDoubleReg, kScratchDoubleReg, 1);
__ vandps(i.OutputSimd128Register(), kScratchDoubleReg,
i.InputOperand(0));
break;
}
case kSSEF32x4Neg: {
XMMRegister dst = i.OutputSimd128Register(); XMMRegister dst = i.OutputSimd128Register();
DCHECK_EQ(dst, i.InputSimd128Register(0)); XMMRegister src = i.InputSimd128Register(0);
__ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg); if (dst == src) {
__ pslld(kScratchDoubleReg, 31); __ Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
__ xorps(dst, kScratchDoubleReg); __ Pslld(kScratchDoubleReg, kScratchDoubleReg, 31);
break; __ Xorps(dst, kScratchDoubleReg);
} } else {
case kAVXF32x4Neg: { __ Pcmpeqd(dst, dst);
CpuFeatureScope avx_scope(tasm(), AVX); __ Pslld(dst, dst, 31);
__ vpcmpeqd(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg); __ Xorps(dst, src);
__ vpslld(kScratchDoubleReg, kScratchDoubleReg, 31); }
__ vxorps(i.OutputSimd128Register(), kScratchDoubleReg,
i.InputOperand(0));
break; break;
} }
case kIA32F32x4Sqrt: { case kIA32F32x4Sqrt: {
@ -3824,17 +3820,16 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ Pcmpeqd(dst, dst); __ Pcmpeqd(dst, dst);
break; break;
} }
case kSSES128Not: { case kIA32S128Not: {
XMMRegister dst = i.OutputSimd128Register(); XMMRegister dst = i.OutputSimd128Register();
DCHECK_EQ(dst, i.InputSimd128Register(0)); XMMRegister src = i.InputSimd128Register(0);
__ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg); if (dst == src) {
__ pxor(dst, kScratchDoubleReg); __ Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
break; __ Pxor(dst, kScratchDoubleReg);
} } else {
case kAVXS128Not: { __ Pcmpeqd(dst, dst);
CpuFeatureScope avx_scope(tasm(), AVX); __ Pxor(dst, src);
__ vpcmpeqd(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg); }
__ vpxor(i.OutputSimd128Register(), kScratchDoubleReg, i.InputOperand(0));
break; break;
} }
case kSSES128And: { case kSSES128And: {

View File

@ -163,10 +163,8 @@ namespace compiler {
V(IA32Insertps) \ V(IA32Insertps) \
V(IA32F32x4SConvertI32x4) \ V(IA32F32x4SConvertI32x4) \
V(IA32F32x4UConvertI32x4) \ V(IA32F32x4UConvertI32x4) \
V(SSEF32x4Abs) \ V(IA32F32x4Abs) \
V(AVXF32x4Abs) \ V(IA32F32x4Neg) \
V(SSEF32x4Neg) \
V(AVXF32x4Neg) \
V(IA32F32x4Sqrt) \ V(IA32F32x4Sqrt) \
V(IA32F32x4RecipApprox) \ V(IA32F32x4RecipApprox) \
V(IA32F32x4RecipSqrtApprox) \ V(IA32F32x4RecipSqrtApprox) \
@ -359,8 +357,7 @@ namespace compiler {
V(IA32S128Const) \ V(IA32S128Const) \
V(IA32S128Zero) \ V(IA32S128Zero) \
V(IA32S128AllOnes) \ V(IA32S128AllOnes) \
V(SSES128Not) \ V(IA32S128Not) \
V(AVXS128Not) \
V(SSES128And) \ V(SSES128And) \
V(AVXS128And) \ V(AVXS128And) \
V(SSES128Or) \ V(SSES128Or) \

View File

@ -145,10 +145,8 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kIA32Insertps: case kIA32Insertps:
case kIA32F32x4SConvertI32x4: case kIA32F32x4SConvertI32x4:
case kIA32F32x4UConvertI32x4: case kIA32F32x4UConvertI32x4:
case kSSEF32x4Abs: case kIA32F32x4Abs:
case kAVXF32x4Abs: case kIA32F32x4Neg:
case kSSEF32x4Neg:
case kAVXF32x4Neg:
case kIA32F32x4Sqrt: case kIA32F32x4Sqrt:
case kIA32F32x4RecipApprox: case kIA32F32x4RecipApprox:
case kIA32F32x4RecipSqrtApprox: case kIA32F32x4RecipSqrtApprox:
@ -341,8 +339,7 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kIA32S128Const: case kIA32S128Const:
case kIA32S128Zero: case kIA32S128Zero:
case kIA32S128AllOnes: case kIA32S128AllOnes:
case kSSES128Not: case kIA32S128Not:
case kAVXS128Not:
case kSSES128And: case kSSES128And:
case kAVXS128And: case kAVXS128And:
case kSSES128Or: case kSSES128Or:

View File

@ -2279,6 +2279,8 @@ void InstructionSelector::VisitWord32AtomicPairCompareExchange(Node* node) {
V(I16x8ExtMulHighI8x16U) V(I16x8ExtMulHighI8x16U)
#define SIMD_UNOP_LIST(V) \ #define SIMD_UNOP_LIST(V) \
V(F32x4Abs) \
V(F32x4Neg) \
V(F32x4Sqrt) \ V(F32x4Sqrt) \
V(F32x4SConvertI32x4) \ V(F32x4SConvertI32x4) \
V(F32x4RecipApprox) \ V(F32x4RecipApprox) \
@ -2303,11 +2305,7 @@ void InstructionSelector::VisitWord32AtomicPairCompareExchange(Node* node) {
V(I16x8Abs) \ V(I16x8Abs) \
V(I8x16Neg) \ V(I8x16Neg) \
V(I8x16Abs) \ V(I8x16Abs) \
V(I8x16BitMask) V(I8x16BitMask) \
#define SIMD_UNOP_PREFIX_LIST(V) \
V(F32x4Abs) \
V(F32x4Neg) \
V(S128Not) V(S128Not)
#define SIMD_ANYTRUE_LIST(V) \ #define SIMD_ANYTRUE_LIST(V) \
@ -2612,25 +2610,6 @@ SIMD_UNOP_LIST(VISIT_SIMD_UNOP)
#undef VISIT_SIMD_UNOP #undef VISIT_SIMD_UNOP
#undef SIMD_UNOP_LIST #undef SIMD_UNOP_LIST
// TODO(v8:9198): SSE instructions that read 16 bytes from memory require the
// operand to be 16-byte aligned. AVX instructions relax this requirement, but
// might have reduced performance if the memory crosses cache line. But since we
// have limited xmm registers, this might be okay to alleviate register
// pressure.
#define VISIT_SIMD_UNOP_PREFIX(Opcode) \
void InstructionSelector::Visit##Opcode(Node* node) { \
IA32OperandGenerator g(this); \
if (IsSupported(AVX)) { \
Emit(kAVX##Opcode, g.DefineAsRegister(node), g.Use(node->InputAt(0))); \
} else { \
Emit(kSSE##Opcode, g.DefineSameAsFirst(node), \
g.UseRegister(node->InputAt(0))); \
} \
}
SIMD_UNOP_PREFIX_LIST(VISIT_SIMD_UNOP_PREFIX)
#undef VISIT_SIMD_UNOP_PREFIX
#undef SIMD_UNOP_PREFIX_LIST
// The implementation of AnyTrue is the same for all shapes. // The implementation of AnyTrue is the same for all shapes.
#define VISIT_SIMD_ANYTRUE(Opcode) \ #define VISIT_SIMD_ANYTRUE(Opcode) \
void InstructionSelector::Visit##Opcode(Node* node) { \ void InstructionSelector::Visit##Opcode(Node* node) { \