From 5ce3afe2d725e88a8b1be9ac6dbcbfe27792c417 Mon Sep 17 00:00:00 2001 From: Ng Zhi An Date: Tue, 2 Feb 2021 16:31:02 -0800 Subject: [PATCH] [wasm-simd][x64] Fix F64x2ConvertLowI32x4U isel and codegen The previous instruction selection was too loose, it only required registers for the inputs. The codegen also used Unpcklps(dst, mask), and failed to use src at all. The test case was accidentally passing because dst == src (xmm0) by chance. We fix this bug requiring that for AVX, any register is fine, but for SSE, require dst == src. Also redefine Unpcklps to check dst == src in the no AVX case. Bug: v8:11265 Change-Id: I1988b2d2da8263512bf6e675e6297c50f55663f7 Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2668918 Reviewed-by: Deepti Gandluri Commit-Queue: Zhi An Ng Cr-Commit-Position: refs/heads/master@{#72536} --- src/codegen/x64/macro-assembler-x64.cc | 10 ++++++++++ src/codegen/x64/macro-assembler-x64.h | 2 +- src/compiler/backend/x64/code-generator-x64.cc | 8 +++++--- src/compiler/backend/x64/instruction-selector-x64.cc | 10 +++++++++- 4 files changed, 25 insertions(+), 5 deletions(-) diff --git a/src/codegen/x64/macro-assembler-x64.cc b/src/codegen/x64/macro-assembler-x64.cc index 06aba6d2fe..b8c598ef6c 100644 --- a/src/codegen/x64/macro-assembler-x64.cc +++ b/src/codegen/x64/macro-assembler-x64.cc @@ -1825,6 +1825,16 @@ void TurboAssembler::Pmaddubsw(XMMRegister dst, XMMRegister src1, } } +void TurboAssembler::Unpcklps(XMMRegister dst, XMMRegister src1, Operand src2) { + if (CpuFeatures::IsSupported(AVX)) { + CpuFeatureScope avx_scope(this, AVX); + vunpcklps(dst, src1, src2); + } else { + DCHECK_EQ(dst, src1); + unpcklps(dst, src2); + } +} + void TurboAssembler::Shufps(XMMRegister dst, XMMRegister src1, XMMRegister src2, byte imm8) { if (CpuFeatures::IsSupported(AVX)) { diff --git a/src/codegen/x64/macro-assembler-x64.h b/src/codegen/x64/macro-assembler-x64.h index c4636f6f18..3ee134da0c 100644 --- a/src/codegen/x64/macro-assembler-x64.h +++ b/src/codegen/x64/macro-assembler-x64.h @@ -162,7 +162,6 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase { AVX_OP(Addss, addss) AVX_OP(Addsd, addsd) AVX_OP(Mulsd, mulsd) - AVX_OP(Unpcklps, unpcklps) AVX_OP(Andps, andps) AVX_OP(Andnps, andnps) AVX_OP(Andpd, andpd) @@ -542,6 +541,7 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase { void Pmaddubsw(XMMRegister dst, XMMRegister src1, Operand src2); void Pmaddubsw(XMMRegister dst, XMMRegister src1, XMMRegister src2); + void Unpcklps(XMMRegister dst, XMMRegister src1, Operand src2); // Shufps that will mov src1 into dst if AVX is not supported. void Shufps(XMMRegister dst, XMMRegister src1, XMMRegister src2, byte imm8); diff --git a/src/compiler/backend/x64/code-generator-x64.cc b/src/compiler/backend/x64/code-generator-x64.cc index 93cee70650..f1ec75cc08 100644 --- a/src/compiler/backend/x64/code-generator-x64.cc +++ b/src/compiler/backend/x64/code-generator-x64.cc @@ -2483,13 +2483,15 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( } case kX64F64x2ConvertLowI32x4U: { XMMRegister dst = i.OutputSimd128Register(); + XMMRegister src = i.InputSimd128Register(0); // dst = [ src_low, 0x43300000, src_high, 0x4330000 ]; // 0x43300000'00000000 is a special double where the significand bits // precisely represents all uint32 numbers. __ Unpcklps( - dst, __ ExternalReferenceAsOperand( - ExternalReference:: - address_of_wasm_f64x2_convert_low_i32x4_u_int_mask())); + dst, src, + __ ExternalReferenceAsOperand( + ExternalReference:: + address_of_wasm_f64x2_convert_low_i32x4_u_int_mask())); __ Subpd(dst, __ ExternalReferenceAsOperand( ExternalReference::address_of_wasm_double_2_power_52())); diff --git a/src/compiler/backend/x64/instruction-selector-x64.cc b/src/compiler/backend/x64/instruction-selector-x64.cc index c55e19bb8d..8bbb1712ee 100644 --- a/src/compiler/backend/x64/instruction-selector-x64.cc +++ b/src/compiler/backend/x64/instruction-selector-x64.cc @@ -10,7 +10,9 @@ #include "src/base/platform/wrappers.h" #include "src/codegen/cpu-features.h" #include "src/codegen/machine-type.h" +#include "src/compiler/backend/instruction-codes.h" #include "src/compiler/backend/instruction-selector-impl.h" +#include "src/compiler/backend/instruction.h" #include "src/compiler/machine-operator.h" #include "src/compiler/node-matchers.h" #include "src/compiler/node-properties.h" @@ -2931,7 +2933,6 @@ VISIT_ATOMIC_BINOP(Xor) #define SIMD_UNOP_LIST(V) \ V(F64x2Sqrt) \ V(F64x2ConvertLowI32x4S) \ - V(F64x2ConvertLowI32x4U) \ V(F64x2PromoteLowF32x4) \ V(F32x4SConvertI32x4) \ V(F32x4Abs) \ @@ -3726,6 +3727,13 @@ void InstructionSelector::VisitI8x16Popcnt(Node* node) { arraysize(temps), temps); } +void InstructionSelector::VisitF64x2ConvertLowI32x4U(Node* node) { + X64OperandGenerator g(this); + InstructionOperand dst = + IsSupported(AVX) ? g.DefineAsRegister(node) : g.DefineSameAsFirst(node); + Emit(kX64F64x2ConvertLowI32x4U, dst, g.UseRegister(node->InputAt(0))); +} + void InstructionSelector::VisitI32x4TruncSatF64x2SZero(Node* node) { X64OperandGenerator g(this); if (CpuFeatures::IsSupported(AVX)) {