From 31bf056fbf33b10ff1a986e10b7b92c9d6d1199c Mon Sep 17 00:00:00 2001 From: Deepti Gandluri Date: Wed, 13 Jan 2021 09:25:25 -0800 Subject: [PATCH] [wasm-simd][ia32] Consolidate SSE/AVX opcodes for SIMD unops Bug: v8:11217 Change-Id: Ic58b0ac90fa227cadc35829bd1e5629f6749020a Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2616102 Commit-Queue: Deepti Gandluri Reviewed-by: Zhi An Ng Cr-Commit-Position: refs/heads/master@{#72083} --- .../backend/ia32/code-generator-ia32.cc | 67 +++++++++---------- .../backend/ia32/instruction-codes-ia32.h | 9 +-- .../ia32/instruction-scheduler-ia32.cc | 9 +-- .../backend/ia32/instruction-selector-ia32.cc | 27 +------- 4 files changed, 40 insertions(+), 72 deletions(-) diff --git a/src/compiler/backend/ia32/code-generator-ia32.cc b/src/compiler/backend/ia32/code-generator-ia32.cc index a1bd712cf2..2de4c98329 100644 --- a/src/compiler/backend/ia32/code-generator-ia32.cc +++ b/src/compiler/backend/ia32/code-generator-ia32.cc @@ -2435,36 +2435,32 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( __ Addps(dst, dst, kScratchDoubleReg); // add hi and lo, may round. break; } - case kSSEF32x4Abs: { + case kIA32F32x4Abs: { XMMRegister dst = i.OutputSimd128Register(); - DCHECK_EQ(i.InputSimd128Register(0), dst); - __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg); - __ psrld(kScratchDoubleReg, 1); - __ andps(dst, kScratchDoubleReg); + XMMRegister src = i.InputSimd128Register(0); + if (dst == src) { + __ Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg); + __ Psrld(kScratchDoubleReg, kScratchDoubleReg, 1); + __ Andps(dst, kScratchDoubleReg); + } else { + __ Pcmpeqd(dst, dst); + __ Psrld(dst, dst, 1); + __ Andps(dst, src); + } break; } - case kAVXF32x4Abs: { - CpuFeatureScope avx_scope(tasm(), AVX); - __ vpcmpeqd(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg); - __ vpsrld(kScratchDoubleReg, kScratchDoubleReg, 1); - __ vandps(i.OutputSimd128Register(), kScratchDoubleReg, - i.InputOperand(0)); - break; - } - case kSSEF32x4Neg: { + case kIA32F32x4Neg: { XMMRegister dst = i.OutputSimd128Register(); - DCHECK_EQ(dst, i.InputSimd128Register(0)); - __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg); - __ pslld(kScratchDoubleReg, 31); - __ xorps(dst, kScratchDoubleReg); - break; - } - case kAVXF32x4Neg: { - CpuFeatureScope avx_scope(tasm(), AVX); - __ vpcmpeqd(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg); - __ vpslld(kScratchDoubleReg, kScratchDoubleReg, 31); - __ vxorps(i.OutputSimd128Register(), kScratchDoubleReg, - i.InputOperand(0)); + XMMRegister src = i.InputSimd128Register(0); + if (dst == src) { + __ Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg); + __ Pslld(kScratchDoubleReg, kScratchDoubleReg, 31); + __ Xorps(dst, kScratchDoubleReg); + } else { + __ Pcmpeqd(dst, dst); + __ Pslld(dst, dst, 31); + __ Xorps(dst, src); + } break; } case kIA32F32x4Sqrt: { @@ -3824,17 +3820,16 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( __ Pcmpeqd(dst, dst); break; } - case kSSES128Not: { + case kIA32S128Not: { XMMRegister dst = i.OutputSimd128Register(); - DCHECK_EQ(dst, i.InputSimd128Register(0)); - __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg); - __ pxor(dst, kScratchDoubleReg); - break; - } - case kAVXS128Not: { - CpuFeatureScope avx_scope(tasm(), AVX); - __ vpcmpeqd(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg); - __ vpxor(i.OutputSimd128Register(), kScratchDoubleReg, i.InputOperand(0)); + XMMRegister src = i.InputSimd128Register(0); + if (dst == src) { + __ Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg); + __ Pxor(dst, kScratchDoubleReg); + } else { + __ Pcmpeqd(dst, dst); + __ Pxor(dst, src); + } break; } case kSSES128And: { diff --git a/src/compiler/backend/ia32/instruction-codes-ia32.h b/src/compiler/backend/ia32/instruction-codes-ia32.h index 632eeace20..0830bd790f 100644 --- a/src/compiler/backend/ia32/instruction-codes-ia32.h +++ b/src/compiler/backend/ia32/instruction-codes-ia32.h @@ -163,10 +163,8 @@ namespace compiler { V(IA32Insertps) \ V(IA32F32x4SConvertI32x4) \ V(IA32F32x4UConvertI32x4) \ - V(SSEF32x4Abs) \ - V(AVXF32x4Abs) \ - V(SSEF32x4Neg) \ - V(AVXF32x4Neg) \ + V(IA32F32x4Abs) \ + V(IA32F32x4Neg) \ V(IA32F32x4Sqrt) \ V(IA32F32x4RecipApprox) \ V(IA32F32x4RecipSqrtApprox) \ @@ -359,8 +357,7 @@ namespace compiler { V(IA32S128Const) \ V(IA32S128Zero) \ V(IA32S128AllOnes) \ - V(SSES128Not) \ - V(AVXS128Not) \ + V(IA32S128Not) \ V(SSES128And) \ V(AVXS128And) \ V(SSES128Or) \ diff --git a/src/compiler/backend/ia32/instruction-scheduler-ia32.cc b/src/compiler/backend/ia32/instruction-scheduler-ia32.cc index f82f299c5c..c80ae8ec30 100644 --- a/src/compiler/backend/ia32/instruction-scheduler-ia32.cc +++ b/src/compiler/backend/ia32/instruction-scheduler-ia32.cc @@ -145,10 +145,8 @@ int InstructionScheduler::GetTargetInstructionFlags( case kIA32Insertps: case kIA32F32x4SConvertI32x4: case kIA32F32x4UConvertI32x4: - case kSSEF32x4Abs: - case kAVXF32x4Abs: - case kSSEF32x4Neg: - case kAVXF32x4Neg: + case kIA32F32x4Abs: + case kIA32F32x4Neg: case kIA32F32x4Sqrt: case kIA32F32x4RecipApprox: case kIA32F32x4RecipSqrtApprox: @@ -341,8 +339,7 @@ int InstructionScheduler::GetTargetInstructionFlags( case kIA32S128Const: case kIA32S128Zero: case kIA32S128AllOnes: - case kSSES128Not: - case kAVXS128Not: + case kIA32S128Not: case kSSES128And: case kAVXS128And: case kSSES128Or: diff --git a/src/compiler/backend/ia32/instruction-selector-ia32.cc b/src/compiler/backend/ia32/instruction-selector-ia32.cc index 340e612f66..ac680bea4f 100644 --- a/src/compiler/backend/ia32/instruction-selector-ia32.cc +++ b/src/compiler/backend/ia32/instruction-selector-ia32.cc @@ -2279,6 +2279,8 @@ void InstructionSelector::VisitWord32AtomicPairCompareExchange(Node* node) { V(I16x8ExtMulHighI8x16U) #define SIMD_UNOP_LIST(V) \ + V(F32x4Abs) \ + V(F32x4Neg) \ V(F32x4Sqrt) \ V(F32x4SConvertI32x4) \ V(F32x4RecipApprox) \ @@ -2303,11 +2305,7 @@ void InstructionSelector::VisitWord32AtomicPairCompareExchange(Node* node) { V(I16x8Abs) \ V(I8x16Neg) \ V(I8x16Abs) \ - V(I8x16BitMask) - -#define SIMD_UNOP_PREFIX_LIST(V) \ - V(F32x4Abs) \ - V(F32x4Neg) \ + V(I8x16BitMask) \ V(S128Not) #define SIMD_ANYTRUE_LIST(V) \ @@ -2612,25 +2610,6 @@ SIMD_UNOP_LIST(VISIT_SIMD_UNOP) #undef VISIT_SIMD_UNOP #undef SIMD_UNOP_LIST -// TODO(v8:9198): SSE instructions that read 16 bytes from memory require the -// operand to be 16-byte aligned. AVX instructions relax this requirement, but -// might have reduced performance if the memory crosses cache line. But since we -// have limited xmm registers, this might be okay to alleviate register -// pressure. -#define VISIT_SIMD_UNOP_PREFIX(Opcode) \ - void InstructionSelector::Visit##Opcode(Node* node) { \ - IA32OperandGenerator g(this); \ - if (IsSupported(AVX)) { \ - Emit(kAVX##Opcode, g.DefineAsRegister(node), g.Use(node->InputAt(0))); \ - } else { \ - Emit(kSSE##Opcode, g.DefineSameAsFirst(node), \ - g.UseRegister(node->InputAt(0))); \ - } \ - } -SIMD_UNOP_PREFIX_LIST(VISIT_SIMD_UNOP_PREFIX) -#undef VISIT_SIMD_UNOP_PREFIX -#undef SIMD_UNOP_PREFIX_LIST - // The implementation of AnyTrue is the same for all shapes. #define VISIT_SIMD_ANYTRUE(Opcode) \ void InstructionSelector::Visit##Opcode(Node* node) { \