From a16add806dff503f71027dc4d7ff436ec0cf5747 Mon Sep 17 00:00:00 2001 From: Ng Zhi An Date: Tue, 9 Feb 2021 13:58:04 -0800 Subject: [PATCH] [wasm-simd][ia32] Implement i64x2 signed compares The code sequence is exactly the same as x64. Bug: v8:11415 Change-Id: I53ed2723eda29c0a250cff514372a3d45b203476 Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2683495 Reviewed-by: Bill Budge Commit-Queue: Zhi An Ng Cr-Commit-Position: refs/heads/master@{#72637} --- src/codegen/ia32/assembler-ia32.cc | 22 +++++++ src/codegen/ia32/assembler-ia32.h | 5 ++ .../backend/ia32/code-generator-ia32.cc | 63 +++++++++++++++++++ .../backend/ia32/instruction-codes-ia32.h | 2 + .../ia32/instruction-scheduler-ia32.cc | 2 + .../backend/ia32/instruction-selector-ia32.cc | 31 +++++++++ src/compiler/backend/instruction-selector.cc | 4 +- test/cctest/test-disasm-ia32.cc | 10 +++ test/cctest/wasm/test-run-wasm-simd.cc | 4 +- 9 files changed, 139 insertions(+), 4 deletions(-) diff --git a/src/codegen/ia32/assembler-ia32.cc b/src/codegen/ia32/assembler-ia32.cc index 67e4cf97e3..739069fee8 100644 --- a/src/codegen/ia32/assembler-ia32.cc +++ b/src/codegen/ia32/assembler-ia32.cc @@ -2516,6 +2516,14 @@ void Assembler::movdqa(XMMRegister dst, Operand src) { emit_sse_operand(dst, src); } +void Assembler::movdqa(XMMRegister dst, XMMRegister src) { + EnsureSpace ensure_space(this); + EMIT(0x66); + EMIT(0x0F); + EMIT(0x6F); + emit_sse_operand(dst, src); +} + void Assembler::movdqu(Operand dst, XMMRegister src) { EnsureSpace ensure_space(this); EMIT(0xF3); @@ -2622,6 +2630,16 @@ void Assembler::extractps(Register dst, XMMRegister src, byte imm8) { EMIT(imm8); } +void Assembler::pcmpgtq(XMMRegister dst, XMMRegister src) { + DCHECK(IsEnabled(SSE4_2)); + EnsureSpace ensure_space(this); + EMIT(0x66); + EMIT(0x0F); + EMIT(0x38); + EMIT(0x37); + emit_sse_operand(src, dst); +} + void Assembler::psllw(XMMRegister reg, uint8_t shift) { EnsureSpace ensure_space(this); EMIT(0x66); @@ -3150,6 +3168,10 @@ void Assembler::vextractps(Operand dst, XMMRegister src, byte imm8) { EMIT(imm8); } +void Assembler::vpcmpgtq(XMMRegister dst, XMMRegister src1, XMMRegister src2) { + vinstr(0x37, dst, src1, src2, k66, k0F38, VexW::kWIG); +} + void Assembler::bmi1(byte op, Register reg, Register vreg, Operand rm) { DCHECK(IsEnabled(BMI1)); EnsureSpace ensure_space(this); diff --git a/src/codegen/ia32/assembler-ia32.h b/src/codegen/ia32/assembler-ia32.h index d97b02bea9..3914c35544 100644 --- a/src/codegen/ia32/assembler-ia32.h +++ b/src/codegen/ia32/assembler-ia32.h @@ -990,6 +990,7 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase { void movdqa(XMMRegister dst, Operand src); void movdqa(Operand dst, XMMRegister src); + void movdqa(XMMRegister dst, XMMRegister src); void movdqu(XMMRegister dst, Operand src); void movdqu(Operand dst, XMMRegister src); void movdqu(XMMRegister dst, XMMRegister src); @@ -1016,6 +1017,8 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase { void extractps(Operand dst, XMMRegister src, byte imm8); void extractps(Register dst, XMMRegister src, byte imm8); + void pcmpgtq(XMMRegister dst, XMMRegister src); + void psllw(XMMRegister reg, uint8_t shift); void pslld(XMMRegister reg, uint8_t shift); void psrlw(XMMRegister reg, uint8_t shift); @@ -1369,6 +1372,8 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase { void vextractps(Operand dst, XMMRegister src, byte imm8); + void vpcmpgtq(XMMRegister dst, XMMRegister src1, XMMRegister src2); + void vmovaps(XMMRegister dst, XMMRegister src) { vmovaps(dst, Operand(src)); } void vmovaps(XMMRegister dst, Operand src) { vps(0x28, dst, xmm0, src); } void vmovapd(XMMRegister dst, XMMRegister src) { vmovapd(dst, Operand(src)); } diff --git a/src/compiler/backend/ia32/code-generator-ia32.cc b/src/compiler/backend/ia32/code-generator-ia32.cc index 16d63cfc3b..7e176e05d0 100644 --- a/src/compiler/backend/ia32/code-generator-ia32.cc +++ b/src/compiler/backend/ia32/code-generator-ia32.cc @@ -2319,6 +2319,69 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( __ Pxor(i.OutputSimd128Register(), kScratchDoubleReg); break; } + case kIA32I64x2GtS: { + XMMRegister dst = i.OutputSimd128Register(); + XMMRegister src0 = i.InputSimd128Register(0); + XMMRegister src1 = i.InputSimd128Register(1); + + if (CpuFeatures::IsSupported(AVX)) { + CpuFeatureScope avx_scope(tasm(), AVX); + __ vpcmpgtq(dst, src0, src1); + } else if (CpuFeatures::IsSupported(SSE4_2)) { + CpuFeatureScope sse_scope(tasm(), SSE4_2); + DCHECK_EQ(dst, src0); + __ pcmpgtq(dst, src1); + } else { + DCHECK_NE(dst, src0); + DCHECK_NE(dst, src1); + __ movdqa(dst, src1); + __ movdqa(kScratchDoubleReg, src0); + __ psubq(dst, src0); + __ pcmpeqd(kScratchDoubleReg, src1); + __ pand(dst, kScratchDoubleReg); + __ movdqa(kScratchDoubleReg, src0); + __ pcmpgtd(kScratchDoubleReg, src1); + __ por(dst, kScratchDoubleReg); + __ pshufd(dst, dst, 0xF5); + } + break; + } + case kIA32I64x2GeS: { + XMMRegister dst = i.OutputSimd128Register(); + XMMRegister src0 = i.InputSimd128Register(0); + XMMRegister src1 = i.InputSimd128Register(1); + + if (CpuFeatures::IsSupported(AVX)) { + CpuFeatureScope avx_scope(tasm(), AVX); + __ vpcmpgtq(dst, src1, src0); + __ vpcmpeqd(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg); + __ vpxor(dst, dst, kScratchDoubleReg); + } else if (CpuFeatures::IsSupported(SSE4_2)) { + CpuFeatureScope sse_scope(tasm(), SSE4_2); + DCHECK_NE(dst, src0); + if (dst != src1) { + __ movdqa(dst, src1); + } + __ pcmpgtq(dst, src0); + __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg); + __ pxor(dst, kScratchDoubleReg); + } else { + DCHECK_NE(dst, src0); + DCHECK_NE(dst, src1); + __ movdqa(dst, src0); + __ movdqa(kScratchDoubleReg, src1); + __ psubq(dst, src1); + __ pcmpeqd(kScratchDoubleReg, src0); + __ pand(dst, kScratchDoubleReg); + __ movdqa(kScratchDoubleReg, src1); + __ pcmpgtd(kScratchDoubleReg, src0); + __ por(dst, kScratchDoubleReg); + __ pshufd(dst, dst, 0xF5); + __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg); + __ pxor(dst, kScratchDoubleReg); + } + break; + } case kIA32I64x2SConvertI32x4Low: { __ Pmovsxdq(i.OutputSimd128Register(), i.InputSimd128Register(0)); break; diff --git a/src/compiler/backend/ia32/instruction-codes-ia32.h b/src/compiler/backend/ia32/instruction-codes-ia32.h index 24da940afa..1b97c2572b 100644 --- a/src/compiler/backend/ia32/instruction-codes-ia32.h +++ b/src/compiler/backend/ia32/instruction-codes-ia32.h @@ -150,6 +150,8 @@ namespace compiler { V(IA32I64x2BitMask) \ V(IA32I64x2Eq) \ V(IA32I64x2Ne) \ + V(IA32I64x2GtS) \ + V(IA32I64x2GeS) \ V(IA32I64x2SignSelect) \ V(IA32I64x2ExtMulLowI32x4S) \ V(IA32I64x2ExtMulHighI32x4S) \ diff --git a/src/compiler/backend/ia32/instruction-scheduler-ia32.cc b/src/compiler/backend/ia32/instruction-scheduler-ia32.cc index c00499829e..839c7610d4 100644 --- a/src/compiler/backend/ia32/instruction-scheduler-ia32.cc +++ b/src/compiler/backend/ia32/instruction-scheduler-ia32.cc @@ -135,6 +135,8 @@ int InstructionScheduler::GetTargetInstructionFlags( case kIA32I64x2BitMask: case kIA32I64x2Eq: case kIA32I64x2Ne: + case kIA32I64x2GtS: + case kIA32I64x2GeS: case kIA32I64x2SignSelect: case kIA32I64x2ExtMulLowI32x4S: case kIA32I64x2ExtMulHighI32x4S: diff --git a/src/compiler/backend/ia32/instruction-selector-ia32.cc b/src/compiler/backend/ia32/instruction-selector-ia32.cc index d8753f6b6f..a24f67e70b 100644 --- a/src/compiler/backend/ia32/instruction-selector-ia32.cc +++ b/src/compiler/backend/ia32/instruction-selector-ia32.cc @@ -3170,6 +3170,37 @@ void InstructionSelector::VisitI32x4TruncSatF64x2UZero(Node* node) { arraysize(temps), temps); } +void InstructionSelector::VisitI64x2GtS(Node* node) { + IA32OperandGenerator g(this); + if (CpuFeatures::IsSupported(AVX)) { + Emit(kIA32I64x2GtS, g.DefineAsRegister(node), + g.UseRegister(node->InputAt(0)), g.UseRegister(node->InputAt(1))); + } else if (CpuFeatures::IsSupported(SSE4_2)) { + Emit(kIA32I64x2GtS, g.DefineSameAsFirst(node), + g.UseRegister(node->InputAt(0)), g.UseRegister(node->InputAt(1))); + } else { + Emit(kIA32I64x2GtS, g.DefineAsRegister(node), + g.UseUniqueRegister(node->InputAt(0)), + g.UseUniqueRegister(node->InputAt(1))); + } +} + +void InstructionSelector::VisitI64x2GeS(Node* node) { + IA32OperandGenerator g(this); + if (CpuFeatures::IsSupported(AVX)) { + Emit(kIA32I64x2GeS, g.DefineAsRegister(node), + g.UseRegister(node->InputAt(0)), g.UseRegister(node->InputAt(1))); + } else if (CpuFeatures::IsSupported(SSE4_2)) { + Emit(kIA32I64x2GeS, g.DefineAsRegister(node), + g.UseRegister(node->InputAt(0)), + g.UseUniqueRegister(node->InputAt(1))); + } else { + Emit(kIA32I64x2GeS, g.DefineAsRegister(node), + g.UseUniqueRegister(node->InputAt(0)), + g.UseUniqueRegister(node->InputAt(1))); + } +} + // static MachineOperatorBuilder::Flags InstructionSelector::SupportedMachineOperatorFlags() { diff --git a/src/compiler/backend/instruction-selector.cc b/src/compiler/backend/instruction-selector.cc index 7d24a1dac1..549b78280e 100644 --- a/src/compiler/backend/instruction-selector.cc +++ b/src/compiler/backend/instruction-selector.cc @@ -2799,10 +2799,10 @@ void InstructionSelector::VisitI32x4WidenI8x16S(Node* node) { UNIMPLEMENTED(); } void InstructionSelector::VisitI32x4WidenI8x16U(Node* node) { UNIMPLEMENTED(); } #endif // !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_ARM64 -#if !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_ARM64 +#if !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_IA32 void InstructionSelector::VisitI64x2GtS(Node* node) { UNIMPLEMENTED(); } void InstructionSelector::VisitI64x2GeS(Node* node) { UNIMPLEMENTED(); } -#endif //! V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_ARM64 +#endif //! V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_IA32 void InstructionSelector::VisitFinishRegion(Node* node) { EmitIdentity(node); } diff --git a/test/cctest/test-disasm-ia32.cc b/test/cctest/test-disasm-ia32.cc index b4171c3906..3d8d662e18 100644 --- a/test/cctest/test-disasm-ia32.cc +++ b/test/cctest/test-disasm-ia32.cc @@ -479,6 +479,7 @@ TEST(DisasmIa320) { // 128 bit move instructions. __ movdqa(xmm0, Operand(ebx, ecx, times_4, 10000)); __ movdqa(Operand(ebx, ecx, times_4, 10000), xmm0); + __ movdqa(xmm1, xmm0); __ movdqu(xmm0, Operand(ebx, ecx, times_4, 10000)); __ movdqu(Operand(ebx, ecx, times_4, 10000), xmm0); __ movdqu(xmm1, xmm0); @@ -645,6 +646,13 @@ TEST(DisasmIa320) { } #undef EMIT_SSE34_INSTR + { + if (CpuFeatures::IsSupported(SSE4_2)) { + CpuFeatureScope scope(&assm, SSE4_2); + __ pcmpgtq(xmm0, xmm1); + } + } + // AVX instruction { if (CpuFeatures::IsSupported(AVX)) { @@ -828,6 +836,8 @@ TEST(DisasmIa320) { __ vmovmskps(edx, xmm5); __ vpmovmskb(ebx, xmm1); + __ vpcmpgtq(xmm0, xmm1, xmm2); + #define EMIT_SSE2_AVXINSTR(instruction, notUsed1, notUsed2, notUsed3) \ __ v##instruction(xmm7, xmm5, xmm1); \ __ v##instruction(xmm7, xmm5, Operand(edx, 4)); diff --git a/test/cctest/wasm/test-run-wasm-simd.cc b/test/cctest/wasm/test-run-wasm-simd.cc index c02848a0fa..440a558281 100644 --- a/test/cctest/wasm/test-run-wasm-simd.cc +++ b/test/cctest/wasm/test-run-wasm-simd.cc @@ -1050,7 +1050,7 @@ WASM_SIMD_TEST_NO_LOWERING(I64x2Ne) { RunI64x2BinOpTest(execution_tier, lower_simd, kExprI64x2Ne, NotEqual); } -#if V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64 +#if V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_IA32 WASM_SIMD_TEST_NO_LOWERING(I64x2LtS) { RunI64x2BinOpTest(execution_tier, lower_simd, kExprI64x2LtS, Less); } @@ -1066,7 +1066,7 @@ WASM_SIMD_TEST_NO_LOWERING(I64x2GtS) { WASM_SIMD_TEST_NO_LOWERING(I64x2GeS) { RunI64x2BinOpTest(execution_tier, lower_simd, kExprI64x2GeS, GreaterEqual); } -#endif // V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64 +#endif // V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_IA32 WASM_SIMD_TEST(F64x2Splat) { WasmRunner r(execution_tier, lower_simd);