[wasm-simd][ia32] Consolidate SSE/AVX opcodes for SIMD unops
Bug: v8:11217 Change-Id: Ic58b0ac90fa227cadc35829bd1e5629f6749020a Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2616102 Commit-Queue: Deepti Gandluri <gdeepti@chromium.org> Reviewed-by: Zhi An Ng <zhin@chromium.org> Cr-Commit-Position: refs/heads/master@{#72083}
This commit is contained in:
parent
8b77fd9df1
commit
31bf056fbf
@ -2435,36 +2435,32 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
|
|||||||
__ Addps(dst, dst, kScratchDoubleReg); // add hi and lo, may round.
|
__ Addps(dst, dst, kScratchDoubleReg); // add hi and lo, may round.
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case kSSEF32x4Abs: {
|
case kIA32F32x4Abs: {
|
||||||
XMMRegister dst = i.OutputSimd128Register();
|
XMMRegister dst = i.OutputSimd128Register();
|
||||||
DCHECK_EQ(i.InputSimd128Register(0), dst);
|
XMMRegister src = i.InputSimd128Register(0);
|
||||||
__ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
|
if (dst == src) {
|
||||||
__ psrld(kScratchDoubleReg, 1);
|
__ Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
|
||||||
__ andps(dst, kScratchDoubleReg);
|
__ Psrld(kScratchDoubleReg, kScratchDoubleReg, 1);
|
||||||
|
__ Andps(dst, kScratchDoubleReg);
|
||||||
|
} else {
|
||||||
|
__ Pcmpeqd(dst, dst);
|
||||||
|
__ Psrld(dst, dst, 1);
|
||||||
|
__ Andps(dst, src);
|
||||||
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case kAVXF32x4Abs: {
|
case kIA32F32x4Neg: {
|
||||||
CpuFeatureScope avx_scope(tasm(), AVX);
|
|
||||||
__ vpcmpeqd(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
|
|
||||||
__ vpsrld(kScratchDoubleReg, kScratchDoubleReg, 1);
|
|
||||||
__ vandps(i.OutputSimd128Register(), kScratchDoubleReg,
|
|
||||||
i.InputOperand(0));
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
case kSSEF32x4Neg: {
|
|
||||||
XMMRegister dst = i.OutputSimd128Register();
|
XMMRegister dst = i.OutputSimd128Register();
|
||||||
DCHECK_EQ(dst, i.InputSimd128Register(0));
|
XMMRegister src = i.InputSimd128Register(0);
|
||||||
__ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
|
if (dst == src) {
|
||||||
__ pslld(kScratchDoubleReg, 31);
|
__ Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
|
||||||
__ xorps(dst, kScratchDoubleReg);
|
__ Pslld(kScratchDoubleReg, kScratchDoubleReg, 31);
|
||||||
break;
|
__ Xorps(dst, kScratchDoubleReg);
|
||||||
|
} else {
|
||||||
|
__ Pcmpeqd(dst, dst);
|
||||||
|
__ Pslld(dst, dst, 31);
|
||||||
|
__ Xorps(dst, src);
|
||||||
}
|
}
|
||||||
case kAVXF32x4Neg: {
|
|
||||||
CpuFeatureScope avx_scope(tasm(), AVX);
|
|
||||||
__ vpcmpeqd(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
|
|
||||||
__ vpslld(kScratchDoubleReg, kScratchDoubleReg, 31);
|
|
||||||
__ vxorps(i.OutputSimd128Register(), kScratchDoubleReg,
|
|
||||||
i.InputOperand(0));
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case kIA32F32x4Sqrt: {
|
case kIA32F32x4Sqrt: {
|
||||||
@ -3824,17 +3820,16 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
|
|||||||
__ Pcmpeqd(dst, dst);
|
__ Pcmpeqd(dst, dst);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case kSSES128Not: {
|
case kIA32S128Not: {
|
||||||
XMMRegister dst = i.OutputSimd128Register();
|
XMMRegister dst = i.OutputSimd128Register();
|
||||||
DCHECK_EQ(dst, i.InputSimd128Register(0));
|
XMMRegister src = i.InputSimd128Register(0);
|
||||||
__ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
|
if (dst == src) {
|
||||||
__ pxor(dst, kScratchDoubleReg);
|
__ Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
|
||||||
break;
|
__ Pxor(dst, kScratchDoubleReg);
|
||||||
|
} else {
|
||||||
|
__ Pcmpeqd(dst, dst);
|
||||||
|
__ Pxor(dst, src);
|
||||||
}
|
}
|
||||||
case kAVXS128Not: {
|
|
||||||
CpuFeatureScope avx_scope(tasm(), AVX);
|
|
||||||
__ vpcmpeqd(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
|
|
||||||
__ vpxor(i.OutputSimd128Register(), kScratchDoubleReg, i.InputOperand(0));
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case kSSES128And: {
|
case kSSES128And: {
|
||||||
|
@ -163,10 +163,8 @@ namespace compiler {
|
|||||||
V(IA32Insertps) \
|
V(IA32Insertps) \
|
||||||
V(IA32F32x4SConvertI32x4) \
|
V(IA32F32x4SConvertI32x4) \
|
||||||
V(IA32F32x4UConvertI32x4) \
|
V(IA32F32x4UConvertI32x4) \
|
||||||
V(SSEF32x4Abs) \
|
V(IA32F32x4Abs) \
|
||||||
V(AVXF32x4Abs) \
|
V(IA32F32x4Neg) \
|
||||||
V(SSEF32x4Neg) \
|
|
||||||
V(AVXF32x4Neg) \
|
|
||||||
V(IA32F32x4Sqrt) \
|
V(IA32F32x4Sqrt) \
|
||||||
V(IA32F32x4RecipApprox) \
|
V(IA32F32x4RecipApprox) \
|
||||||
V(IA32F32x4RecipSqrtApprox) \
|
V(IA32F32x4RecipSqrtApprox) \
|
||||||
@ -359,8 +357,7 @@ namespace compiler {
|
|||||||
V(IA32S128Const) \
|
V(IA32S128Const) \
|
||||||
V(IA32S128Zero) \
|
V(IA32S128Zero) \
|
||||||
V(IA32S128AllOnes) \
|
V(IA32S128AllOnes) \
|
||||||
V(SSES128Not) \
|
V(IA32S128Not) \
|
||||||
V(AVXS128Not) \
|
|
||||||
V(SSES128And) \
|
V(SSES128And) \
|
||||||
V(AVXS128And) \
|
V(AVXS128And) \
|
||||||
V(SSES128Or) \
|
V(SSES128Or) \
|
||||||
|
@ -145,10 +145,8 @@ int InstructionScheduler::GetTargetInstructionFlags(
|
|||||||
case kIA32Insertps:
|
case kIA32Insertps:
|
||||||
case kIA32F32x4SConvertI32x4:
|
case kIA32F32x4SConvertI32x4:
|
||||||
case kIA32F32x4UConvertI32x4:
|
case kIA32F32x4UConvertI32x4:
|
||||||
case kSSEF32x4Abs:
|
case kIA32F32x4Abs:
|
||||||
case kAVXF32x4Abs:
|
case kIA32F32x4Neg:
|
||||||
case kSSEF32x4Neg:
|
|
||||||
case kAVXF32x4Neg:
|
|
||||||
case kIA32F32x4Sqrt:
|
case kIA32F32x4Sqrt:
|
||||||
case kIA32F32x4RecipApprox:
|
case kIA32F32x4RecipApprox:
|
||||||
case kIA32F32x4RecipSqrtApprox:
|
case kIA32F32x4RecipSqrtApprox:
|
||||||
@ -341,8 +339,7 @@ int InstructionScheduler::GetTargetInstructionFlags(
|
|||||||
case kIA32S128Const:
|
case kIA32S128Const:
|
||||||
case kIA32S128Zero:
|
case kIA32S128Zero:
|
||||||
case kIA32S128AllOnes:
|
case kIA32S128AllOnes:
|
||||||
case kSSES128Not:
|
case kIA32S128Not:
|
||||||
case kAVXS128Not:
|
|
||||||
case kSSES128And:
|
case kSSES128And:
|
||||||
case kAVXS128And:
|
case kAVXS128And:
|
||||||
case kSSES128Or:
|
case kSSES128Or:
|
||||||
|
@ -2279,6 +2279,8 @@ void InstructionSelector::VisitWord32AtomicPairCompareExchange(Node* node) {
|
|||||||
V(I16x8ExtMulHighI8x16U)
|
V(I16x8ExtMulHighI8x16U)
|
||||||
|
|
||||||
#define SIMD_UNOP_LIST(V) \
|
#define SIMD_UNOP_LIST(V) \
|
||||||
|
V(F32x4Abs) \
|
||||||
|
V(F32x4Neg) \
|
||||||
V(F32x4Sqrt) \
|
V(F32x4Sqrt) \
|
||||||
V(F32x4SConvertI32x4) \
|
V(F32x4SConvertI32x4) \
|
||||||
V(F32x4RecipApprox) \
|
V(F32x4RecipApprox) \
|
||||||
@ -2303,11 +2305,7 @@ void InstructionSelector::VisitWord32AtomicPairCompareExchange(Node* node) {
|
|||||||
V(I16x8Abs) \
|
V(I16x8Abs) \
|
||||||
V(I8x16Neg) \
|
V(I8x16Neg) \
|
||||||
V(I8x16Abs) \
|
V(I8x16Abs) \
|
||||||
V(I8x16BitMask)
|
V(I8x16BitMask) \
|
||||||
|
|
||||||
#define SIMD_UNOP_PREFIX_LIST(V) \
|
|
||||||
V(F32x4Abs) \
|
|
||||||
V(F32x4Neg) \
|
|
||||||
V(S128Not)
|
V(S128Not)
|
||||||
|
|
||||||
#define SIMD_ANYTRUE_LIST(V) \
|
#define SIMD_ANYTRUE_LIST(V) \
|
||||||
@ -2612,25 +2610,6 @@ SIMD_UNOP_LIST(VISIT_SIMD_UNOP)
|
|||||||
#undef VISIT_SIMD_UNOP
|
#undef VISIT_SIMD_UNOP
|
||||||
#undef SIMD_UNOP_LIST
|
#undef SIMD_UNOP_LIST
|
||||||
|
|
||||||
// TODO(v8:9198): SSE instructions that read 16 bytes from memory require the
|
|
||||||
// operand to be 16-byte aligned. AVX instructions relax this requirement, but
|
|
||||||
// might have reduced performance if the memory crosses cache line. But since we
|
|
||||||
// have limited xmm registers, this might be okay to alleviate register
|
|
||||||
// pressure.
|
|
||||||
#define VISIT_SIMD_UNOP_PREFIX(Opcode) \
|
|
||||||
void InstructionSelector::Visit##Opcode(Node* node) { \
|
|
||||||
IA32OperandGenerator g(this); \
|
|
||||||
if (IsSupported(AVX)) { \
|
|
||||||
Emit(kAVX##Opcode, g.DefineAsRegister(node), g.Use(node->InputAt(0))); \
|
|
||||||
} else { \
|
|
||||||
Emit(kSSE##Opcode, g.DefineSameAsFirst(node), \
|
|
||||||
g.UseRegister(node->InputAt(0))); \
|
|
||||||
} \
|
|
||||||
}
|
|
||||||
SIMD_UNOP_PREFIX_LIST(VISIT_SIMD_UNOP_PREFIX)
|
|
||||||
#undef VISIT_SIMD_UNOP_PREFIX
|
|
||||||
#undef SIMD_UNOP_PREFIX_LIST
|
|
||||||
|
|
||||||
// The implementation of AnyTrue is the same for all shapes.
|
// The implementation of AnyTrue is the same for all shapes.
|
||||||
#define VISIT_SIMD_ANYTRUE(Opcode) \
|
#define VISIT_SIMD_ANYTRUE(Opcode) \
|
||||||
void InstructionSelector::Visit##Opcode(Node* node) { \
|
void InstructionSelector::Visit##Opcode(Node* node) { \
|
||||||
|
Loading…
Reference in New Issue
Block a user