[wasm-simd][ia32] Implement i64x2 signed compares

The code sequence is exactly the same as x64.

Bug: v8:11415
Change-Id: I53ed2723eda29c0a250cff514372a3d45b203476
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2683495
Reviewed-by: Bill Budge <bbudge@chromium.org>
Commit-Queue: Zhi An Ng <zhin@chromium.org>
Cr-Commit-Position: refs/heads/master@{#72637}
This commit is contained in:
Ng Zhi An 2021-02-09 13:58:04 -08:00 committed by Commit Bot
parent 2c055469d8
commit a16add806d
9 changed files with 139 additions and 4 deletions

View File

@ -2516,6 +2516,14 @@ void Assembler::movdqa(XMMRegister dst, Operand src) {
emit_sse_operand(dst, src);
}
void Assembler::movdqa(XMMRegister dst, XMMRegister src) {
EnsureSpace ensure_space(this);
EMIT(0x66);
EMIT(0x0F);
EMIT(0x6F);
emit_sse_operand(dst, src);
}
void Assembler::movdqu(Operand dst, XMMRegister src) {
EnsureSpace ensure_space(this);
EMIT(0xF3);
@ -2622,6 +2630,16 @@ void Assembler::extractps(Register dst, XMMRegister src, byte imm8) {
EMIT(imm8);
}
void Assembler::pcmpgtq(XMMRegister dst, XMMRegister src) {
DCHECK(IsEnabled(SSE4_2));
EnsureSpace ensure_space(this);
EMIT(0x66);
EMIT(0x0F);
EMIT(0x38);
EMIT(0x37);
emit_sse_operand(src, dst);
}
void Assembler::psllw(XMMRegister reg, uint8_t shift) {
EnsureSpace ensure_space(this);
EMIT(0x66);
@ -3150,6 +3168,10 @@ void Assembler::vextractps(Operand dst, XMMRegister src, byte imm8) {
EMIT(imm8);
}
void Assembler::vpcmpgtq(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
vinstr(0x37, dst, src1, src2, k66, k0F38, VexW::kWIG);
}
void Assembler::bmi1(byte op, Register reg, Register vreg, Operand rm) {
DCHECK(IsEnabled(BMI1));
EnsureSpace ensure_space(this);

View File

@ -990,6 +990,7 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase {
void movdqa(XMMRegister dst, Operand src);
void movdqa(Operand dst, XMMRegister src);
void movdqa(XMMRegister dst, XMMRegister src);
void movdqu(XMMRegister dst, Operand src);
void movdqu(Operand dst, XMMRegister src);
void movdqu(XMMRegister dst, XMMRegister src);
@ -1016,6 +1017,8 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase {
void extractps(Operand dst, XMMRegister src, byte imm8);
void extractps(Register dst, XMMRegister src, byte imm8);
void pcmpgtq(XMMRegister dst, XMMRegister src);
void psllw(XMMRegister reg, uint8_t shift);
void pslld(XMMRegister reg, uint8_t shift);
void psrlw(XMMRegister reg, uint8_t shift);
@ -1369,6 +1372,8 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase {
void vextractps(Operand dst, XMMRegister src, byte imm8);
void vpcmpgtq(XMMRegister dst, XMMRegister src1, XMMRegister src2);
void vmovaps(XMMRegister dst, XMMRegister src) { vmovaps(dst, Operand(src)); }
void vmovaps(XMMRegister dst, Operand src) { vps(0x28, dst, xmm0, src); }
void vmovapd(XMMRegister dst, XMMRegister src) { vmovapd(dst, Operand(src)); }

View File

@ -2319,6 +2319,69 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ Pxor(i.OutputSimd128Register(), kScratchDoubleReg);
break;
}
case kIA32I64x2GtS: {
XMMRegister dst = i.OutputSimd128Register();
XMMRegister src0 = i.InputSimd128Register(0);
XMMRegister src1 = i.InputSimd128Register(1);
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope avx_scope(tasm(), AVX);
__ vpcmpgtq(dst, src0, src1);
} else if (CpuFeatures::IsSupported(SSE4_2)) {
CpuFeatureScope sse_scope(tasm(), SSE4_2);
DCHECK_EQ(dst, src0);
__ pcmpgtq(dst, src1);
} else {
DCHECK_NE(dst, src0);
DCHECK_NE(dst, src1);
__ movdqa(dst, src1);
__ movdqa(kScratchDoubleReg, src0);
__ psubq(dst, src0);
__ pcmpeqd(kScratchDoubleReg, src1);
__ pand(dst, kScratchDoubleReg);
__ movdqa(kScratchDoubleReg, src0);
__ pcmpgtd(kScratchDoubleReg, src1);
__ por(dst, kScratchDoubleReg);
__ pshufd(dst, dst, 0xF5);
}
break;
}
case kIA32I64x2GeS: {
XMMRegister dst = i.OutputSimd128Register();
XMMRegister src0 = i.InputSimd128Register(0);
XMMRegister src1 = i.InputSimd128Register(1);
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope avx_scope(tasm(), AVX);
__ vpcmpgtq(dst, src1, src0);
__ vpcmpeqd(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
__ vpxor(dst, dst, kScratchDoubleReg);
} else if (CpuFeatures::IsSupported(SSE4_2)) {
CpuFeatureScope sse_scope(tasm(), SSE4_2);
DCHECK_NE(dst, src0);
if (dst != src1) {
__ movdqa(dst, src1);
}
__ pcmpgtq(dst, src0);
__ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
__ pxor(dst, kScratchDoubleReg);
} else {
DCHECK_NE(dst, src0);
DCHECK_NE(dst, src1);
__ movdqa(dst, src0);
__ movdqa(kScratchDoubleReg, src1);
__ psubq(dst, src1);
__ pcmpeqd(kScratchDoubleReg, src0);
__ pand(dst, kScratchDoubleReg);
__ movdqa(kScratchDoubleReg, src1);
__ pcmpgtd(kScratchDoubleReg, src0);
__ por(dst, kScratchDoubleReg);
__ pshufd(dst, dst, 0xF5);
__ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
__ pxor(dst, kScratchDoubleReg);
}
break;
}
case kIA32I64x2SConvertI32x4Low: {
__ Pmovsxdq(i.OutputSimd128Register(), i.InputSimd128Register(0));
break;

View File

@ -150,6 +150,8 @@ namespace compiler {
V(IA32I64x2BitMask) \
V(IA32I64x2Eq) \
V(IA32I64x2Ne) \
V(IA32I64x2GtS) \
V(IA32I64x2GeS) \
V(IA32I64x2SignSelect) \
V(IA32I64x2ExtMulLowI32x4S) \
V(IA32I64x2ExtMulHighI32x4S) \

View File

@ -135,6 +135,8 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kIA32I64x2BitMask:
case kIA32I64x2Eq:
case kIA32I64x2Ne:
case kIA32I64x2GtS:
case kIA32I64x2GeS:
case kIA32I64x2SignSelect:
case kIA32I64x2ExtMulLowI32x4S:
case kIA32I64x2ExtMulHighI32x4S:

View File

@ -3170,6 +3170,37 @@ void InstructionSelector::VisitI32x4TruncSatF64x2UZero(Node* node) {
arraysize(temps), temps);
}
void InstructionSelector::VisitI64x2GtS(Node* node) {
IA32OperandGenerator g(this);
if (CpuFeatures::IsSupported(AVX)) {
Emit(kIA32I64x2GtS, g.DefineAsRegister(node),
g.UseRegister(node->InputAt(0)), g.UseRegister(node->InputAt(1)));
} else if (CpuFeatures::IsSupported(SSE4_2)) {
Emit(kIA32I64x2GtS, g.DefineSameAsFirst(node),
g.UseRegister(node->InputAt(0)), g.UseRegister(node->InputAt(1)));
} else {
Emit(kIA32I64x2GtS, g.DefineAsRegister(node),
g.UseUniqueRegister(node->InputAt(0)),
g.UseUniqueRegister(node->InputAt(1)));
}
}
void InstructionSelector::VisitI64x2GeS(Node* node) {
IA32OperandGenerator g(this);
if (CpuFeatures::IsSupported(AVX)) {
Emit(kIA32I64x2GeS, g.DefineAsRegister(node),
g.UseRegister(node->InputAt(0)), g.UseRegister(node->InputAt(1)));
} else if (CpuFeatures::IsSupported(SSE4_2)) {
Emit(kIA32I64x2GeS, g.DefineAsRegister(node),
g.UseRegister(node->InputAt(0)),
g.UseUniqueRegister(node->InputAt(1)));
} else {
Emit(kIA32I64x2GeS, g.DefineAsRegister(node),
g.UseUniqueRegister(node->InputAt(0)),
g.UseUniqueRegister(node->InputAt(1)));
}
}
// static
MachineOperatorBuilder::Flags
InstructionSelector::SupportedMachineOperatorFlags() {

View File

@ -2799,10 +2799,10 @@ void InstructionSelector::VisitI32x4WidenI8x16S(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitI32x4WidenI8x16U(Node* node) { UNIMPLEMENTED(); }
#endif // !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_ARM64
#if !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_ARM64
#if !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_IA32
void InstructionSelector::VisitI64x2GtS(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitI64x2GeS(Node* node) { UNIMPLEMENTED(); }
#endif //! V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_ARM64
#endif //! V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_IA32
void InstructionSelector::VisitFinishRegion(Node* node) { EmitIdentity(node); }

View File

@ -479,6 +479,7 @@ TEST(DisasmIa320) {
// 128 bit move instructions.
__ movdqa(xmm0, Operand(ebx, ecx, times_4, 10000));
__ movdqa(Operand(ebx, ecx, times_4, 10000), xmm0);
__ movdqa(xmm1, xmm0);
__ movdqu(xmm0, Operand(ebx, ecx, times_4, 10000));
__ movdqu(Operand(ebx, ecx, times_4, 10000), xmm0);
__ movdqu(xmm1, xmm0);
@ -645,6 +646,13 @@ TEST(DisasmIa320) {
}
#undef EMIT_SSE34_INSTR
{
if (CpuFeatures::IsSupported(SSE4_2)) {
CpuFeatureScope scope(&assm, SSE4_2);
__ pcmpgtq(xmm0, xmm1);
}
}
// AVX instruction
{
if (CpuFeatures::IsSupported(AVX)) {
@ -828,6 +836,8 @@ TEST(DisasmIa320) {
__ vmovmskps(edx, xmm5);
__ vpmovmskb(ebx, xmm1);
__ vpcmpgtq(xmm0, xmm1, xmm2);
#define EMIT_SSE2_AVXINSTR(instruction, notUsed1, notUsed2, notUsed3) \
__ v##instruction(xmm7, xmm5, xmm1); \
__ v##instruction(xmm7, xmm5, Operand(edx, 4));

View File

@ -1050,7 +1050,7 @@ WASM_SIMD_TEST_NO_LOWERING(I64x2Ne) {
RunI64x2BinOpTest(execution_tier, lower_simd, kExprI64x2Ne, NotEqual);
}
#if V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64
#if V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_IA32
WASM_SIMD_TEST_NO_LOWERING(I64x2LtS) {
RunI64x2BinOpTest(execution_tier, lower_simd, kExprI64x2LtS, Less);
}
@ -1066,7 +1066,7 @@ WASM_SIMD_TEST_NO_LOWERING(I64x2GtS) {
WASM_SIMD_TEST_NO_LOWERING(I64x2GeS) {
RunI64x2BinOpTest(execution_tier, lower_simd, kExprI64x2GeS, GreaterEqual);
}
#endif // V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64
#endif // V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_IA32
WASM_SIMD_TEST(F64x2Splat) {
WasmRunner<int32_t, double> r(execution_tier, lower_simd);