[wasm-simd][x64] Fix F64x2ConvertLowI32x4U isel and codegen

The previous instruction selection was too loose, it only required
registers for the inputs. The codegen also used Unpcklps(dst, mask), and
failed to use src at all. The test case was accidentally passing
because dst == src (xmm0) by chance.

We fix this bug requiring that for AVX, any register is fine, but for
SSE, require dst == src. Also redefine Unpcklps to check dst == src in
the no AVX case.

Bug: v8:11265
Change-Id: I1988b2d2da8263512bf6e675e6297c50f55663f7
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2668918
Reviewed-by: Deepti Gandluri <gdeepti@chromium.org>
Commit-Queue: Zhi An Ng <zhin@chromium.org>
Cr-Commit-Position: refs/heads/master@{#72536}
This commit is contained in:
Ng Zhi An 2021-02-02 16:31:02 -08:00 committed by Commit Bot
parent fec9c5d655
commit 5ce3afe2d7
4 changed files with 25 additions and 5 deletions

View File

@ -1825,6 +1825,16 @@ void TurboAssembler::Pmaddubsw(XMMRegister dst, XMMRegister src1,
}
}
void TurboAssembler::Unpcklps(XMMRegister dst, XMMRegister src1, Operand src2) {
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope avx_scope(this, AVX);
vunpcklps(dst, src1, src2);
} else {
DCHECK_EQ(dst, src1);
unpcklps(dst, src2);
}
}
void TurboAssembler::Shufps(XMMRegister dst, XMMRegister src1, XMMRegister src2,
byte imm8) {
if (CpuFeatures::IsSupported(AVX)) {

View File

@ -162,7 +162,6 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
AVX_OP(Addss, addss)
AVX_OP(Addsd, addsd)
AVX_OP(Mulsd, mulsd)
AVX_OP(Unpcklps, unpcklps)
AVX_OP(Andps, andps)
AVX_OP(Andnps, andnps)
AVX_OP(Andpd, andpd)
@ -542,6 +541,7 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
void Pmaddubsw(XMMRegister dst, XMMRegister src1, Operand src2);
void Pmaddubsw(XMMRegister dst, XMMRegister src1, XMMRegister src2);
void Unpcklps(XMMRegister dst, XMMRegister src1, Operand src2);
// Shufps that will mov src1 into dst if AVX is not supported.
void Shufps(XMMRegister dst, XMMRegister src1, XMMRegister src2, byte imm8);

View File

@ -2483,11 +2483,13 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
}
case kX64F64x2ConvertLowI32x4U: {
XMMRegister dst = i.OutputSimd128Register();
XMMRegister src = i.InputSimd128Register(0);
// dst = [ src_low, 0x43300000, src_high, 0x4330000 ];
// 0x43300000'00000000 is a special double where the significand bits
// precisely represents all uint32 numbers.
__ Unpcklps(
dst, __ ExternalReferenceAsOperand(
dst, src,
__ ExternalReferenceAsOperand(
ExternalReference::
address_of_wasm_f64x2_convert_low_i32x4_u_int_mask()));
__ Subpd(dst,

View File

@ -10,7 +10,9 @@
#include "src/base/platform/wrappers.h"
#include "src/codegen/cpu-features.h"
#include "src/codegen/machine-type.h"
#include "src/compiler/backend/instruction-codes.h"
#include "src/compiler/backend/instruction-selector-impl.h"
#include "src/compiler/backend/instruction.h"
#include "src/compiler/machine-operator.h"
#include "src/compiler/node-matchers.h"
#include "src/compiler/node-properties.h"
@ -2931,7 +2933,6 @@ VISIT_ATOMIC_BINOP(Xor)
#define SIMD_UNOP_LIST(V) \
V(F64x2Sqrt) \
V(F64x2ConvertLowI32x4S) \
V(F64x2ConvertLowI32x4U) \
V(F64x2PromoteLowF32x4) \
V(F32x4SConvertI32x4) \
V(F32x4Abs) \
@ -3726,6 +3727,13 @@ void InstructionSelector::VisitI8x16Popcnt(Node* node) {
arraysize(temps), temps);
}
void InstructionSelector::VisitF64x2ConvertLowI32x4U(Node* node) {
X64OperandGenerator g(this);
InstructionOperand dst =
IsSupported(AVX) ? g.DefineAsRegister(node) : g.DefineSameAsFirst(node);
Emit(kX64F64x2ConvertLowI32x4U, dst, g.UseRegister(node->InputAt(0)));
}
void InstructionSelector::VisitI32x4TruncSatF64x2SZero(Node* node) {
X64OperandGenerator g(this);
if (CpuFeatures::IsSupported(AVX)) {