[wasm-simd][ia32] Consolidate SSE/AVX opcodes for SIMD unops
Bug: v8:11217 Change-Id: Ic58b0ac90fa227cadc35829bd1e5629f6749020a Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2616102 Commit-Queue: Deepti Gandluri <gdeepti@chromium.org> Reviewed-by: Zhi An Ng <zhin@chromium.org> Cr-Commit-Position: refs/heads/master@{#72083}
This commit is contained in:
parent
8b77fd9df1
commit
31bf056fbf
@ -2435,36 +2435,32 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
|
||||
__ Addps(dst, dst, kScratchDoubleReg); // add hi and lo, may round.
|
||||
break;
|
||||
}
|
||||
case kSSEF32x4Abs: {
|
||||
case kIA32F32x4Abs: {
|
||||
XMMRegister dst = i.OutputSimd128Register();
|
||||
DCHECK_EQ(i.InputSimd128Register(0), dst);
|
||||
__ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
|
||||
__ psrld(kScratchDoubleReg, 1);
|
||||
__ andps(dst, kScratchDoubleReg);
|
||||
XMMRegister src = i.InputSimd128Register(0);
|
||||
if (dst == src) {
|
||||
__ Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
|
||||
__ Psrld(kScratchDoubleReg, kScratchDoubleReg, 1);
|
||||
__ Andps(dst, kScratchDoubleReg);
|
||||
} else {
|
||||
__ Pcmpeqd(dst, dst);
|
||||
__ Psrld(dst, dst, 1);
|
||||
__ Andps(dst, src);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case kAVXF32x4Abs: {
|
||||
CpuFeatureScope avx_scope(tasm(), AVX);
|
||||
__ vpcmpeqd(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
|
||||
__ vpsrld(kScratchDoubleReg, kScratchDoubleReg, 1);
|
||||
__ vandps(i.OutputSimd128Register(), kScratchDoubleReg,
|
||||
i.InputOperand(0));
|
||||
break;
|
||||
}
|
||||
case kSSEF32x4Neg: {
|
||||
case kIA32F32x4Neg: {
|
||||
XMMRegister dst = i.OutputSimd128Register();
|
||||
DCHECK_EQ(dst, i.InputSimd128Register(0));
|
||||
__ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
|
||||
__ pslld(kScratchDoubleReg, 31);
|
||||
__ xorps(dst, kScratchDoubleReg);
|
||||
break;
|
||||
XMMRegister src = i.InputSimd128Register(0);
|
||||
if (dst == src) {
|
||||
__ Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
|
||||
__ Pslld(kScratchDoubleReg, kScratchDoubleReg, 31);
|
||||
__ Xorps(dst, kScratchDoubleReg);
|
||||
} else {
|
||||
__ Pcmpeqd(dst, dst);
|
||||
__ Pslld(dst, dst, 31);
|
||||
__ Xorps(dst, src);
|
||||
}
|
||||
case kAVXF32x4Neg: {
|
||||
CpuFeatureScope avx_scope(tasm(), AVX);
|
||||
__ vpcmpeqd(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
|
||||
__ vpslld(kScratchDoubleReg, kScratchDoubleReg, 31);
|
||||
__ vxorps(i.OutputSimd128Register(), kScratchDoubleReg,
|
||||
i.InputOperand(0));
|
||||
break;
|
||||
}
|
||||
case kIA32F32x4Sqrt: {
|
||||
@ -3824,17 +3820,16 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
|
||||
__ Pcmpeqd(dst, dst);
|
||||
break;
|
||||
}
|
||||
case kSSES128Not: {
|
||||
case kIA32S128Not: {
|
||||
XMMRegister dst = i.OutputSimd128Register();
|
||||
DCHECK_EQ(dst, i.InputSimd128Register(0));
|
||||
__ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
|
||||
__ pxor(dst, kScratchDoubleReg);
|
||||
break;
|
||||
XMMRegister src = i.InputSimd128Register(0);
|
||||
if (dst == src) {
|
||||
__ Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
|
||||
__ Pxor(dst, kScratchDoubleReg);
|
||||
} else {
|
||||
__ Pcmpeqd(dst, dst);
|
||||
__ Pxor(dst, src);
|
||||
}
|
||||
case kAVXS128Not: {
|
||||
CpuFeatureScope avx_scope(tasm(), AVX);
|
||||
__ vpcmpeqd(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
|
||||
__ vpxor(i.OutputSimd128Register(), kScratchDoubleReg, i.InputOperand(0));
|
||||
break;
|
||||
}
|
||||
case kSSES128And: {
|
||||
|
@ -163,10 +163,8 @@ namespace compiler {
|
||||
V(IA32Insertps) \
|
||||
V(IA32F32x4SConvertI32x4) \
|
||||
V(IA32F32x4UConvertI32x4) \
|
||||
V(SSEF32x4Abs) \
|
||||
V(AVXF32x4Abs) \
|
||||
V(SSEF32x4Neg) \
|
||||
V(AVXF32x4Neg) \
|
||||
V(IA32F32x4Abs) \
|
||||
V(IA32F32x4Neg) \
|
||||
V(IA32F32x4Sqrt) \
|
||||
V(IA32F32x4RecipApprox) \
|
||||
V(IA32F32x4RecipSqrtApprox) \
|
||||
@ -359,8 +357,7 @@ namespace compiler {
|
||||
V(IA32S128Const) \
|
||||
V(IA32S128Zero) \
|
||||
V(IA32S128AllOnes) \
|
||||
V(SSES128Not) \
|
||||
V(AVXS128Not) \
|
||||
V(IA32S128Not) \
|
||||
V(SSES128And) \
|
||||
V(AVXS128And) \
|
||||
V(SSES128Or) \
|
||||
|
@ -145,10 +145,8 @@ int InstructionScheduler::GetTargetInstructionFlags(
|
||||
case kIA32Insertps:
|
||||
case kIA32F32x4SConvertI32x4:
|
||||
case kIA32F32x4UConvertI32x4:
|
||||
case kSSEF32x4Abs:
|
||||
case kAVXF32x4Abs:
|
||||
case kSSEF32x4Neg:
|
||||
case kAVXF32x4Neg:
|
||||
case kIA32F32x4Abs:
|
||||
case kIA32F32x4Neg:
|
||||
case kIA32F32x4Sqrt:
|
||||
case kIA32F32x4RecipApprox:
|
||||
case kIA32F32x4RecipSqrtApprox:
|
||||
@ -341,8 +339,7 @@ int InstructionScheduler::GetTargetInstructionFlags(
|
||||
case kIA32S128Const:
|
||||
case kIA32S128Zero:
|
||||
case kIA32S128AllOnes:
|
||||
case kSSES128Not:
|
||||
case kAVXS128Not:
|
||||
case kIA32S128Not:
|
||||
case kSSES128And:
|
||||
case kAVXS128And:
|
||||
case kSSES128Or:
|
||||
|
@ -2279,6 +2279,8 @@ void InstructionSelector::VisitWord32AtomicPairCompareExchange(Node* node) {
|
||||
V(I16x8ExtMulHighI8x16U)
|
||||
|
||||
#define SIMD_UNOP_LIST(V) \
|
||||
V(F32x4Abs) \
|
||||
V(F32x4Neg) \
|
||||
V(F32x4Sqrt) \
|
||||
V(F32x4SConvertI32x4) \
|
||||
V(F32x4RecipApprox) \
|
||||
@ -2303,11 +2305,7 @@ void InstructionSelector::VisitWord32AtomicPairCompareExchange(Node* node) {
|
||||
V(I16x8Abs) \
|
||||
V(I8x16Neg) \
|
||||
V(I8x16Abs) \
|
||||
V(I8x16BitMask)
|
||||
|
||||
#define SIMD_UNOP_PREFIX_LIST(V) \
|
||||
V(F32x4Abs) \
|
||||
V(F32x4Neg) \
|
||||
V(I8x16BitMask) \
|
||||
V(S128Not)
|
||||
|
||||
#define SIMD_ANYTRUE_LIST(V) \
|
||||
@ -2612,25 +2610,6 @@ SIMD_UNOP_LIST(VISIT_SIMD_UNOP)
|
||||
#undef VISIT_SIMD_UNOP
|
||||
#undef SIMD_UNOP_LIST
|
||||
|
||||
// TODO(v8:9198): SSE instructions that read 16 bytes from memory require the
|
||||
// operand to be 16-byte aligned. AVX instructions relax this requirement, but
|
||||
// might have reduced performance if the memory crosses cache line. But since we
|
||||
// have limited xmm registers, this might be okay to alleviate register
|
||||
// pressure.
|
||||
#define VISIT_SIMD_UNOP_PREFIX(Opcode) \
|
||||
void InstructionSelector::Visit##Opcode(Node* node) { \
|
||||
IA32OperandGenerator g(this); \
|
||||
if (IsSupported(AVX)) { \
|
||||
Emit(kAVX##Opcode, g.DefineAsRegister(node), g.Use(node->InputAt(0))); \
|
||||
} else { \
|
||||
Emit(kSSE##Opcode, g.DefineSameAsFirst(node), \
|
||||
g.UseRegister(node->InputAt(0))); \
|
||||
} \
|
||||
}
|
||||
SIMD_UNOP_PREFIX_LIST(VISIT_SIMD_UNOP_PREFIX)
|
||||
#undef VISIT_SIMD_UNOP_PREFIX
|
||||
#undef SIMD_UNOP_PREFIX_LIST
|
||||
|
||||
// The implementation of AnyTrue is the same for all shapes.
|
||||
#define VISIT_SIMD_ANYTRUE(Opcode) \
|
||||
void InstructionSelector::Visit##Opcode(Node* node) { \
|
||||
|
Loading…
Reference in New Issue
Block a user