diff --git a/src/codegen/ia32/assembler-ia32.cc b/src/codegen/ia32/assembler-ia32.cc index ba9e0d521c..931297be53 100644 --- a/src/codegen/ia32/assembler-ia32.cc +++ b/src/codegen/ia32/assembler-ia32.cc @@ -2973,6 +2973,24 @@ void Assembler::vpshufd(XMMRegister dst, Operand src, uint8_t shuffle) { EMIT(shuffle); } +void Assembler::vblendvps(XMMRegister dst, XMMRegister src1, XMMRegister src2, + XMMRegister mask) { + vinstr(0x4A, dst, src1, src2, k66, k0F3A, kW0); + EMIT(mask.code() << 4); +} + +void Assembler::vblendvpd(XMMRegister dst, XMMRegister src1, XMMRegister src2, + XMMRegister mask) { + vinstr(0x4B, dst, src1, src2, k66, k0F3A, kW0); + EMIT(mask.code() << 4); +} + +void Assembler::vpblendvb(XMMRegister dst, XMMRegister src1, XMMRegister src2, + XMMRegister mask) { + vinstr(0x4C, dst, src1, src2, k66, k0F3A, kW0); + EMIT(mask.code() << 4); +} + void Assembler::vpblendw(XMMRegister dst, XMMRegister src1, Operand src2, uint8_t mask) { vinstr(0x0E, dst, src1, src2, k66, k0F3A, kWIG); diff --git a/src/codegen/ia32/assembler-ia32.h b/src/codegen/ia32/assembler-ia32.h index 4f71e09ee0..21db98199d 100644 --- a/src/codegen/ia32/assembler-ia32.h +++ b/src/codegen/ia32/assembler-ia32.h @@ -1406,6 +1406,13 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase { } void vpshufd(XMMRegister dst, Operand src, uint8_t shuffle); + void vblendvps(XMMRegister dst, XMMRegister src1, XMMRegister src2, + XMMRegister mask); + void vblendvpd(XMMRegister dst, XMMRegister src1, XMMRegister src2, + XMMRegister mask); + void vpblendvb(XMMRegister dst, XMMRegister src1, XMMRegister src2, + XMMRegister mask); + void vpblendw(XMMRegister dst, XMMRegister src1, XMMRegister src2, uint8_t mask) { vpblendw(dst, src1, Operand(src2), mask); @@ -1692,6 +1699,9 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase { SSE4_INSTRUCTION_LIST(DECLARE_SSE4_INSTRUCTION) SSE4_RM_INSTRUCTION_LIST(DECLARE_SSE4_INSTRUCTION) + DECLARE_SSE4_INSTRUCTION(blendvps, 66, 0F, 38, 14) + DECLARE_SSE4_INSTRUCTION(blendvpd, 66, 0F, 38, 15) + DECLARE_SSE4_INSTRUCTION(pblendvb, 66, 0F, 38, 10) #undef DECLARE_SSE4_INSTRUCTION #define DECLARE_SSE34_AVX_INSTRUCTION(instruction, prefix, escape1, escape2, \ diff --git a/src/codegen/ia32/macro-assembler-ia32.cc b/src/codegen/ia32/macro-assembler-ia32.cc index 072b7f29c6..2ee7edea81 100644 --- a/src/codegen/ia32/macro-assembler-ia32.cc +++ b/src/codegen/ia32/macro-assembler-ia32.cc @@ -1635,6 +1635,45 @@ void TurboAssembler::Pshufb(XMMRegister dst, XMMRegister src, Operand mask) { FATAL("no AVX or SSE3 support"); } +void TurboAssembler::Blendvps(XMMRegister dst, XMMRegister src1, + XMMRegister src2, XMMRegister mask) { + if (CpuFeatures::IsSupported(AVX)) { + CpuFeatureScope avx_scope(this, AVX); + vblendvps(dst, src1, src2, mask); + } else { + CpuFeatureScope scope(this, SSE4_1); + DCHECK_EQ(dst, src1); + DCHECK_EQ(xmm0, mask); + blendvps(dst, src2); + } +} + +void TurboAssembler::Blendvpd(XMMRegister dst, XMMRegister src1, + XMMRegister src2, XMMRegister mask) { + if (CpuFeatures::IsSupported(AVX)) { + CpuFeatureScope avx_scope(this, AVX); + vblendvpd(dst, src1, src2, mask); + } else { + CpuFeatureScope scope(this, SSE4_1); + DCHECK_EQ(dst, src1); + DCHECK_EQ(xmm0, mask); + blendvpd(dst, src2); + } +} + +void TurboAssembler::Pblendvb(XMMRegister dst, XMMRegister src1, + XMMRegister src2, XMMRegister mask) { + if (CpuFeatures::IsSupported(AVX)) { + CpuFeatureScope avx_scope(this, AVX); + vpblendvb(dst, src1, src2, mask); + } else { + CpuFeatureScope scope(this, SSE4_1); + DCHECK_EQ(dst, src1); + DCHECK_EQ(xmm0, mask); + pblendvb(dst, src2); + } +} + void TurboAssembler::Pblendw(XMMRegister dst, Operand src, uint8_t imm8) { if (CpuFeatures::IsSupported(AVX)) { CpuFeatureScope scope(this, AVX); diff --git a/src/codegen/ia32/macro-assembler-ia32.h b/src/codegen/ia32/macro-assembler-ia32.h index e7d0410d5c..e7da2f60db 100644 --- a/src/codegen/ia32/macro-assembler-ia32.h +++ b/src/codegen/ia32/macro-assembler-ia32.h @@ -505,6 +505,14 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase { Pshufb(dst, src, Operand(mask)); } void Pshufb(XMMRegister dst, XMMRegister src, Operand mask); + + void Blendvps(XMMRegister dst, XMMRegister src1, XMMRegister src2, + XMMRegister mask); + void Blendvpd(XMMRegister dst, XMMRegister src1, XMMRegister src2, + XMMRegister mask); + void Pblendvb(XMMRegister dst, XMMRegister src1, XMMRegister src2, + XMMRegister mask); + void Pblendw(XMMRegister dst, XMMRegister src, uint8_t imm8) { Pblendw(dst, Operand(src), imm8); } diff --git a/src/compiler/backend/ia32/code-generator-ia32.cc b/src/compiler/backend/ia32/code-generator-ia32.cc index cdc9bc5f2c..83a5c34f82 100644 --- a/src/compiler/backend/ia32/code-generator-ia32.cc +++ b/src/compiler/backend/ia32/code-generator-ia32.cc @@ -2158,6 +2158,40 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( i.InputSimd128Register(1)); break; } + case kIA32I8x16SignSelect: { + __ Pblendvb(i.OutputSimd128Register(), i.InputSimd128Register(0), + i.InputSimd128Register(1), i.InputSimd128Register(2)); + break; + } + case kIA32I16x8SignSelect: { + if (CpuFeatures::IsSupported(AVX)) { + CpuFeatureScope avx_scope(tasm(), AVX); + __ vpsraw(kScratchDoubleReg, i.InputSimd128Register(2), 15); + __ vpblendvb(i.OutputSimd128Register(), i.InputSimd128Register(0), + i.InputSimd128Register(1), kScratchDoubleReg); + } else { + DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); + XMMRegister mask = i.InputSimd128Register(2); + DCHECK_EQ(xmm0, mask); + __ movapd(kScratchDoubleReg, mask); + __ pxor(mask, mask); + __ pcmpgtw(mask, kScratchDoubleReg); + __ pblendvb(i.OutputSimd128Register(), i.InputSimd128Register(1)); + // Restore mask. + __ movapd(mask, kScratchDoubleReg); + } + break; + } + case kIA32I32x4SignSelect: { + __ Blendvps(i.OutputSimd128Register(), i.InputSimd128Register(0), + i.InputSimd128Register(1), i.InputSimd128Register(2)); + break; + } + case kIA32I64x2SignSelect: { + __ Blendvpd(i.OutputSimd128Register(), i.InputSimd128Register(0), + i.InputSimd128Register(1), i.InputSimd128Register(2)); + break; + } case kSSEF32x4Splat: { DCHECK_EQ(i.OutputDoubleRegister(), i.InputDoubleRegister(0)); XMMRegister dst = i.OutputSimd128Register(); diff --git a/src/compiler/backend/ia32/instruction-codes-ia32.h b/src/compiler/backend/ia32/instruction-codes-ia32.h index 96b3ab80cc..b72cdbd08f 100644 --- a/src/compiler/backend/ia32/instruction-codes-ia32.h +++ b/src/compiler/backend/ia32/instruction-codes-ia32.h @@ -150,6 +150,7 @@ namespace compiler { V(IA32I64x2ShrU) \ V(IA32I64x2BitMask) \ V(IA32I64x2Eq) \ + V(IA32I64x2SignSelect) \ V(SSEF32x4Splat) \ V(AVXF32x4Splat) \ V(SSEF32x4ExtractLane) \ @@ -236,6 +237,7 @@ namespace compiler { V(IA32I32x4Abs) \ V(IA32I32x4BitMask) \ V(IA32I32x4DotI16x8S) \ + V(IA32I32x4SignSelect) \ V(IA32I16x8Splat) \ V(IA32I16x8ExtractLaneS) \ V(IA32I16x8SConvertI8x16Low) \ @@ -289,6 +291,7 @@ namespace compiler { V(IA32I16x8RoundingAverageU) \ V(IA32I16x8Abs) \ V(IA32I16x8BitMask) \ + V(IA32I16x8SignSelect) \ V(IA32I8x16Splat) \ V(IA32I8x16ExtractLaneS) \ V(IA32Pinsrb) \ @@ -342,6 +345,7 @@ namespace compiler { V(IA32I8x16RoundingAverageU) \ V(IA32I8x16Abs) \ V(IA32I8x16BitMask) \ + V(IA32I8x16SignSelect) \ V(IA32S128Const) \ V(IA32S128Zero) \ V(IA32S128AllOnes) \ diff --git a/src/compiler/backend/ia32/instruction-scheduler-ia32.cc b/src/compiler/backend/ia32/instruction-scheduler-ia32.cc index 6de3b792a0..8a555f3825 100644 --- a/src/compiler/backend/ia32/instruction-scheduler-ia32.cc +++ b/src/compiler/backend/ia32/instruction-scheduler-ia32.cc @@ -129,6 +129,7 @@ int InstructionScheduler::GetTargetInstructionFlags( case kIA32I64x2ShrU: case kIA32I64x2BitMask: case kIA32I64x2Eq: + case kIA32I64x2SignSelect: case kSSEF32x4Splat: case kAVXF32x4Splat: case kSSEF32x4ExtractLane: @@ -215,6 +216,7 @@ int InstructionScheduler::GetTargetInstructionFlags( case kIA32I32x4Abs: case kIA32I32x4BitMask: case kIA32I32x4DotI16x8S: + case kIA32I32x4SignSelect: case kIA32I16x8Splat: case kIA32I16x8ExtractLaneS: case kIA32I16x8SConvertI8x16Low: @@ -268,6 +270,7 @@ int InstructionScheduler::GetTargetInstructionFlags( case kIA32I16x8RoundingAverageU: case kIA32I16x8Abs: case kIA32I16x8BitMask: + case kIA32I16x8SignSelect: case kIA32I8x16Splat: case kIA32I8x16ExtractLaneS: case kIA32Pinsrb: @@ -321,6 +324,7 @@ int InstructionScheduler::GetTargetInstructionFlags( case kIA32I8x16RoundingAverageU: case kIA32I8x16Abs: case kIA32I8x16BitMask: + case kIA32I8x16SignSelect: case kIA32S128Const: case kIA32S128Zero: case kIA32S128AllOnes: diff --git a/src/compiler/backend/ia32/instruction-selector-ia32.cc b/src/compiler/backend/ia32/instruction-selector-ia32.cc index 11f85ec9b3..ca8b074e9e 100644 --- a/src/compiler/backend/ia32/instruction-selector-ia32.cc +++ b/src/compiler/backend/ia32/instruction-selector-ia32.cc @@ -2993,6 +2993,40 @@ void InstructionSelector::VisitF64x2Pmax(Node* node) { VisitPminOrPmax(this, node, kIA32F64x2Pmax); } +namespace { +void VisitSignSelect(InstructionSelector* selector, Node* node, + ArchOpcode opcode) { + IA32OperandGenerator g(selector); + // signselect(x, y, -1) = x + // pblendvb(dst, x, y, -1) = dst <- y, so we need to swap x and y. + if (selector->IsSupported(AVX)) { + selector->Emit( + opcode, g.DefineAsRegister(node), g.UseRegister(node->InputAt(1)), + g.UseRegister(node->InputAt(0)), g.UseRegister(node->InputAt(2))); + } else { + selector->Emit( + opcode, g.DefineSameAsFirst(node), g.UseRegister(node->InputAt(1)), + g.UseRegister(node->InputAt(0)), g.UseFixed(node->InputAt(2), xmm0)); + } +} +} // namespace + +void InstructionSelector::VisitI8x16SignSelect(Node* node) { + VisitSignSelect(this, node, kIA32I8x16SignSelect); +} + +void InstructionSelector::VisitI16x8SignSelect(Node* node) { + VisitSignSelect(this, node, kIA32I16x8SignSelect); +} + +void InstructionSelector::VisitI32x4SignSelect(Node* node) { + VisitSignSelect(this, node, kIA32I32x4SignSelect); +} + +void InstructionSelector::VisitI64x2SignSelect(Node* node) { + VisitSignSelect(this, node, kIA32I64x2SignSelect); +} + // static MachineOperatorBuilder::Flags InstructionSelector::SupportedMachineOperatorFlags() { diff --git a/src/compiler/backend/instruction-selector.cc b/src/compiler/backend/instruction-selector.cc index 045e9d5245..dc30d7c7df 100644 --- a/src/compiler/backend/instruction-selector.cc +++ b/src/compiler/backend/instruction-selector.cc @@ -2826,13 +2826,13 @@ void InstructionSelector::VisitLoadLane(Node* node) { UNIMPLEMENTED(); } void InstructionSelector::VisitStoreLane(Node* node) { UNIMPLEMENTED(); } #endif // !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_IA32 && !V8_TARGET_ARCH_ARM64 -#if !V8_TARGET_ARCH_X64 +#if !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_IA32 // TODO(v8:10983) Prototyping sign select. void InstructionSelector::VisitI8x16SignSelect(Node* node) { UNIMPLEMENTED(); } void InstructionSelector::VisitI16x8SignSelect(Node* node) { UNIMPLEMENTED(); } void InstructionSelector::VisitI32x4SignSelect(Node* node) { UNIMPLEMENTED(); } void InstructionSelector::VisitI64x2SignSelect(Node* node) { UNIMPLEMENTED(); } -#endif // !V8_TARGET_ARCH_X64 +#endif // !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_IA32 #if !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_ARM && \ !V8_TARGET_ARCH_IA32 && !V8_TARGET_ARCH_MIPS64 && !V8_TARGET_ARCH_MIPS diff --git a/src/diagnostics/ia32/disasm-ia32.cc b/src/diagnostics/ia32/disasm-ia32.cc index 32706cfd97..d7b57f9ad2 100644 --- a/src/diagnostics/ia32/disasm-ia32.cc +++ b/src/diagnostics/ia32/disasm-ia32.cc @@ -856,6 +856,24 @@ int DisassemblerIA32::AVXInstruction(byte* data) { AppendToBuffer(",%d", Imm8(current)); current++; break; + case 0x4A: + AppendToBuffer("vblendvps %s,%s,", NameOfXMMRegister(regop), + NameOfXMMRegister(vvvv)); + current += PrintRightXMMOperand(current); + AppendToBuffer(",%s", NameOfXMMRegister(*current >> 4)); + break; + case 0x4B: + AppendToBuffer("vblendvps %s,%s,", NameOfXMMRegister(regop), + NameOfXMMRegister(vvvv)); + current += PrintRightXMMOperand(current); + AppendToBuffer(",%s", NameOfXMMRegister(*current >> 4)); + break; + case 0x4C: + AppendToBuffer("vpblendvb %s,%s,", NameOfXMMRegister(regop), + NameOfXMMRegister(vvvv)); + current += PrintRightXMMOperand(current); + AppendToBuffer(",%s", NameOfXMMRegister(*current >> 4)); + break; default: UnimplementedInstruction(); } @@ -2189,6 +2207,21 @@ int DisassemblerIA32::InstructionDecode(v8::internal::Vector out_buffer, SSE4_INSTRUCTION_LIST(SSE34_DIS_CASE) SSE4_RM_INSTRUCTION_LIST(SSE34_DIS_CASE) #undef SSE34_DIS_CASE + case 0x10: + AppendToBuffer("pblendvb %s,", NameOfXMMRegister(regop)); + data += PrintRightXMMOperand(data); + AppendToBuffer(",xmm0"); + break; + case 0x14: + AppendToBuffer("blendvps %s,", NameOfXMMRegister(regop)); + data += PrintRightXMMOperand(data); + AppendToBuffer(",xmm0"); + break; + case 0x15: + AppendToBuffer("blendvps %s,", NameOfXMMRegister(regop)); + data += PrintRightXMMOperand(data); + AppendToBuffer(",xmm0"); + break; default: UnimplementedInstruction(); } diff --git a/test/cctest/test-disasm-ia32.cc b/test/cctest/test-disasm-ia32.cc index da0b0c83ff..f40e2e5ea0 100644 --- a/test/cctest/test-disasm-ia32.cc +++ b/test/cctest/test-disasm-ia32.cc @@ -627,6 +627,10 @@ TEST(DisasmIa320) { __ pinsrd(xmm1, Operand(edx, 4), 0); __ extractps(eax, xmm1, 0); + __ blendvps(xmm3, xmm1); + __ blendvpd(xmm3, xmm1); + __ pblendvb(xmm3, xmm1); + SSE4_INSTRUCTION_LIST(EMIT_SSE34_INSTR) SSE4_RM_INSTRUCTION_LIST(EMIT_SSE34_INSTR) } @@ -786,6 +790,10 @@ TEST(DisasmIa320) { __ vpinsrd(xmm0, xmm1, eax, 0); __ vpinsrd(xmm0, xmm1, Operand(edx, 4), 0); + __ vblendvps(xmm3, xmm1, xmm4, xmm6); + __ vblendvpd(xmm3, xmm1, xmm4, xmm6); + __ vpblendvb(xmm3, xmm1, xmm4, xmm6); + __ vcvtdq2ps(xmm1, xmm0); __ vcvtdq2ps(xmm1, Operand(ebx, ecx, times_4, 10000)); __ vcvttps2dq(xmm1, xmm0); diff --git a/test/cctest/wasm/test-run-wasm-simd.cc b/test/cctest/wasm/test-run-wasm-simd.cc index dec12f2d89..2752237554 100644 --- a/test/cctest/wasm/test-run-wasm-simd.cc +++ b/test/cctest/wasm/test-run-wasm-simd.cc @@ -765,7 +765,7 @@ WASM_SIMD_TEST(F32x4Le) { RunF32x4CompareOpTest(execution_tier, lower_simd, kExprF32x4Le, LessEqual); } -#if V8_TARGET_ARCH_X64 +#if V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_IA32 // TODO(v8:10983) Prototyping sign select. template void RunSignSelect(TestExecutionTier execution_tier, LowerSimd lower_simd, @@ -822,7 +822,7 @@ WASM_SIMD_TEST_NO_LOWERING(I64x2SignSelect) { RunSignSelect(execution_tier, lower_simd, kExprI64x2SignSelect, kExprI64x2Splat, mask); } -#endif // V8_TARGET_ARCH_X64 +#endif // V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_IA32 #if V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_S390X WASM_SIMD_TEST_NO_LOWERING(F32x4Qfma) {