diff --git a/src/codegen/ia32/macro-assembler-ia32.cc b/src/codegen/ia32/macro-assembler-ia32.cc index c6fa03f2f8..36a5a6888d 100644 --- a/src/codegen/ia32/macro-assembler-ia32.cc +++ b/src/codegen/ia32/macro-assembler-ia32.cc @@ -720,6 +720,24 @@ void TurboAssembler::I16x8ExtMul(XMMRegister dst, XMMRegister src1, } } +void TurboAssembler::S128Select(XMMRegister dst, XMMRegister mask, + XMMRegister src1, XMMRegister src2, + XMMRegister scratch) { + if (CpuFeatures::IsSupported(AVX)) { + CpuFeatureScope scope(this, AVX); + vpandn(scratch, mask, src2); + vpand(dst, src1, mask); + vpor(dst, dst, scratch); + } else { + DCHECK_EQ(dst, mask); + // Use float ops as they are 1 byte shorter than int ops. + movaps(scratch, dst); + andnps(scratch, src2); + andps(dst, src1); + orps(dst, scratch); + } +} + void TurboAssembler::ShlPair(Register high, Register low, uint8_t shift) { DCHECK_GE(63, shift); if (shift >= 32) { diff --git a/src/codegen/ia32/macro-assembler-ia32.h b/src/codegen/ia32/macro-assembler-ia32.h index f9e6cb77d5..3bfad51a48 100644 --- a/src/codegen/ia32/macro-assembler-ia32.h +++ b/src/codegen/ia32/macro-assembler-ia32.h @@ -621,6 +621,9 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase { XMMRegister scratch, bool low, bool is_signed); void I16x8ExtMul(XMMRegister dst, XMMRegister src1, XMMRegister src2, XMMRegister scratch, bool low, bool is_signed); + // Requires dst == mask when AVX is not supported. + void S128Select(XMMRegister dst, XMMRegister mask, XMMRegister src1, + XMMRegister src2, XMMRegister scratch); void Push(Register src) { push(src); } void Push(Operand src) { push(src); } diff --git a/src/compiler/backend/ia32/code-generator-ia32.cc b/src/compiler/backend/ia32/code-generator-ia32.cc index cde04a9463..4248a3f185 100644 --- a/src/compiler/backend/ia32/code-generator-ia32.cc +++ b/src/compiler/backend/ia32/code-generator-ia32.cc @@ -6,6 +6,7 @@ #include "src/codegen/assembler-inl.h" #include "src/codegen/callable.h" #include "src/codegen/ia32/assembler-ia32.h" +#include "src/codegen/ia32/register-ia32.h" #include "src/codegen/macro-assembler.h" #include "src/codegen/optimized-compilation-info.h" #include "src/compiler/backend/code-generator-impl.h" @@ -3835,24 +3836,10 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( i.InputOperand(1)); break; } - case kSSES128Select: { - DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); - // Mask used here is stored in dst. - XMMRegister dst = i.OutputSimd128Register(); - // Use float ops as they are 1 byte shorter than int ops. - __ movaps(kScratchDoubleReg, i.InputSimd128Register(0)); - __ andnps(kScratchDoubleReg, i.InputSimd128Register(2)); - __ andps(dst, i.InputSimd128Register(1)); - __ orps(dst, kScratchDoubleReg); - break; - } - case kAVXS128Select: { - CpuFeatureScope avx_scope(tasm(), AVX); - XMMRegister dst = i.OutputSimd128Register(); - XMMRegister mask = i.InputSimd128Register(0); - __ vpandn(kScratchDoubleReg, mask, i.InputSimd128Register(2)); - __ vpand(dst, i.InputSimd128Register(1), mask); - __ vpor(dst, dst, kScratchDoubleReg); + case kIA32S128Select: { + __ S128Select(i.OutputSimd128Register(), i.InputSimd128Register(0), + i.InputSimd128Register(1), i.InputSimd128Register(2), + kScratchDoubleReg); break; } case kIA32S128AndNot: { diff --git a/src/compiler/backend/ia32/instruction-codes-ia32.h b/src/compiler/backend/ia32/instruction-codes-ia32.h index e5d9cd1d83..5a03a3b620 100644 --- a/src/compiler/backend/ia32/instruction-codes-ia32.h +++ b/src/compiler/backend/ia32/instruction-codes-ia32.h @@ -362,8 +362,7 @@ namespace compiler { V(AVXS128Or) \ V(SSES128Xor) \ V(AVXS128Xor) \ - V(SSES128Select) \ - V(AVXS128Select) \ + V(IA32S128Select) \ V(IA32S128AndNot) \ V(IA32I8x16Swizzle) \ V(IA32I8x16Shuffle) \ diff --git a/src/compiler/backend/ia32/instruction-scheduler-ia32.cc b/src/compiler/backend/ia32/instruction-scheduler-ia32.cc index f8d07919df..63ee4697fb 100644 --- a/src/compiler/backend/ia32/instruction-scheduler-ia32.cc +++ b/src/compiler/backend/ia32/instruction-scheduler-ia32.cc @@ -344,8 +344,7 @@ int InstructionScheduler::GetTargetInstructionFlags( case kAVXS128Or: case kSSES128Xor: case kAVXS128Xor: - case kSSES128Select: - case kAVXS128Select: + case kIA32S128Select: case kIA32S128AndNot: case kIA32I8x16Swizzle: case kIA32I8x16Shuffle: diff --git a/src/compiler/backend/ia32/instruction-selector-ia32.cc b/src/compiler/backend/ia32/instruction-selector-ia32.cc index f80533363d..990c04ea05 100644 --- a/src/compiler/backend/ia32/instruction-selector-ia32.cc +++ b/src/compiler/backend/ia32/instruction-selector-ia32.cc @@ -2476,16 +2476,10 @@ void InstructionSelector::VisitS128Zero(Node* node) { void InstructionSelector::VisitS128Select(Node* node) { IA32OperandGenerator g(this); - InstructionOperand operand0 = g.UseRegister(node->InputAt(0)); - InstructionOperand operand1 = g.UseRegister(node->InputAt(1)); - InstructionOperand operand2 = g.UseRegister(node->InputAt(2)); - if (IsSupported(AVX)) { - Emit(kAVXS128Select, g.DefineAsRegister(node), operand0, operand1, - operand2); - } else { - Emit(kSSES128Select, g.DefineSameAsFirst(node), operand0, operand1, - operand2); - } + InstructionOperand dst = + IsSupported(AVX) ? g.DefineAsRegister(node) : g.DefineSameAsFirst(node); + Emit(kIA32S128Select, dst, g.UseRegister(node->InputAt(0)), + g.UseRegister(node->InputAt(1)), g.UseRegister(node->InputAt(2))); } void InstructionSelector::VisitS128AndNot(Node* node) { diff --git a/src/wasm/baseline/ia32/liftoff-assembler-ia32.h b/src/wasm/baseline/ia32/liftoff-assembler-ia32.h index dabf04e56a..6434269340 100644 --- a/src/wasm/baseline/ia32/liftoff-assembler-ia32.h +++ b/src/wasm/baseline/ia32/liftoff-assembler-ia32.h @@ -3147,17 +3147,16 @@ void LiftoffAssembler::emit_s128_select(LiftoffRegister dst, LiftoffRegister src1, LiftoffRegister src2, LiftoffRegister mask) { - if (CpuFeatures::IsSupported(AVX)) { - CpuFeatureScope scope(this, AVX); - vxorps(liftoff::kScratchDoubleReg, src1.fp(), src2.fp()); - vandps(liftoff::kScratchDoubleReg, liftoff::kScratchDoubleReg, mask.fp()); - vxorps(dst.fp(), liftoff::kScratchDoubleReg, src2.fp()); + // Ensure that we don't overwrite any inputs with the movdqu below. + DCHECK_NE(dst, src1); + DCHECK_NE(dst, src2); + if (!CpuFeatures::IsSupported(AVX) && dst != mask) { + movdqu(dst.fp(), mask.fp()); + S128Select(dst.fp(), dst.fp(), src1.fp(), src2.fp(), + liftoff::kScratchDoubleReg); } else { - movaps(liftoff::kScratchDoubleReg, src1.fp()); - xorps(liftoff::kScratchDoubleReg, src2.fp()); - andps(liftoff::kScratchDoubleReg, mask.fp()); - if (dst.fp() != src2.fp()) movaps(dst.fp(), src2.fp()); - xorps(dst.fp(), liftoff::kScratchDoubleReg); + S128Select(dst.fp(), mask.fp(), src1.fp(), src2.fp(), + liftoff::kScratchDoubleReg); } }