Reland "[wasm-simd][liftoff][x64] Move v128.select into macro-assembler"

This is a reland of 2d5f981a04

The fix is in liftoff-assembler-x64, to call S128Select with dst
as the mask when AVX is not supported and dst != mask.

Original change's description:
> [wasm-simd][liftoff][x64] Move v128.select into macro-assembler
>
> This allows us to reuse this optimized code sequence in Liftoff.
>
> We can't do the same thing in IA32 yet, there is no kScratchDoubleReg
> defined in the macro-assembler-ia32.cc, it is defined in code-generator-ia32
> as xmm0 but I'm not sure if it is safe to just use that in the macro assembler.
>
> Change-Id: I6c761857c49d2518fbc82cd0796c62fc86665cb5
> Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2596581
> Commit-Queue: Zhi An Ng <zhin@chromium.org>
> Reviewed-by: Clemens Backes <clemensb@chromium.org>
> Reviewed-by: Bill Budge <bbudge@chromium.org>
> Cr-Commit-Position: refs/heads/master@{#71915}

Change-Id: Ib96ce0e1d5762f6513ef87f240b25ef3ae59441f
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2612324
Reviewed-by: Clemens Backes <clemensb@chromium.org>
Reviewed-by: Bill Budge <bbudge@chromium.org>
Commit-Queue: Zhi An Ng <zhin@chromium.org>
Cr-Commit-Position: refs/heads/master@{#71961}
This commit is contained in:
Zhi An Ng 2021-01-06 01:28:15 +00:00 committed by Commit Bot
parent 2893b9fbd6
commit 025443a4a8
4 changed files with 33 additions and 29 deletions

View File

@ -2214,6 +2214,25 @@ void TurboAssembler::Psrld(XMMRegister dst, XMMRegister src, byte imm8) {
}
}
void TurboAssembler::S128Select(XMMRegister dst, XMMRegister mask,
XMMRegister src1, XMMRegister src2) {
// v128.select = v128.or(v128.and(v1, c), v128.andnot(v2, c)).
// pandn(x, y) = !x & y, so we have to flip the mask and input.
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope avx_scope(this, AVX);
vpandn(kScratchDoubleReg, mask, src2);
vpand(dst, src1, mask);
vpor(dst, dst, kScratchDoubleReg);
} else {
DCHECK_EQ(dst, mask);
// Use float ops as they are 1 byte shorter than int ops.
movaps(kScratchDoubleReg, mask);
andnps(kScratchDoubleReg, src2);
andps(dst, src1);
orps(dst, kScratchDoubleReg);
}
}
void TurboAssembler::Lzcntl(Register dst, Register src) {
if (CpuFeatures::IsSupported(LZCNT)) {
CpuFeatureScope scope(this, LZCNT);

View File

@ -578,6 +578,10 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
void I32x4SConvertI16x8High(XMMRegister dst, XMMRegister src);
void I32x4UConvertI16x8High(XMMRegister dst, XMMRegister src);
// Requires dst == mask when AVX is not supported.
void S128Select(XMMRegister dst, XMMRegister mask, XMMRegister src1,
XMMRegister src2);
void I64x2ExtMul(XMMRegister dst, XMMRegister src1, XMMRegister src2,
bool low, bool is_signed);
// Requires that dst == src1 if AVX is not supported.

View File

@ -3762,25 +3762,8 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
break;
}
case kX64S128Select: {
// v128.select = v128.or(v128.and(v1, c), v128.andnot(v2, c)).
// pandn(x, y) = !x & y, so we have to flip the mask and input.
XMMRegister dst = i.OutputSimd128Register();
XMMRegister mask = i.InputSimd128Register(0);
XMMRegister src1 = i.InputSimd128Register(1);
XMMRegister src2 = i.InputSimd128Register(2);
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope avx_scope(tasm(), AVX);
__ vpandn(kScratchDoubleReg, mask, src2);
__ vpand(dst, src1, mask);
__ vpor(dst, dst, kScratchDoubleReg);
} else {
DCHECK_EQ(dst, mask);
// Use float ops as they are 1 byte shorter than int ops.
__ movaps(kScratchDoubleReg, mask);
__ andnps(kScratchDoubleReg, src2);
__ andps(dst, src1);
__ orps(dst, kScratchDoubleReg);
}
__ S128Select(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(1), i.InputSimd128Register(2));
break;
}
case kX64S128AndNot: {

View File

@ -7,6 +7,7 @@
#include "src/base/platform/wrappers.h"
#include "src/codegen/assembler.h"
#include "src/codegen/cpu-features.h"
#include "src/heap/memory-chunk.h"
#include "src/wasm/baseline/liftoff-assembler.h"
#include "src/wasm/simd-shuffle.h"
@ -2756,17 +2757,14 @@ void LiftoffAssembler::emit_s128_select(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2,
LiftoffRegister mask) {
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope scope(this, AVX);
vxorps(kScratchDoubleReg, src1.fp(), src2.fp());
vandps(kScratchDoubleReg, kScratchDoubleReg, mask.fp());
vxorps(dst.fp(), kScratchDoubleReg, src2.fp());
// Ensure that we don't overwrite any inputs with the movdqu below.
DCHECK_NE(dst, src1);
DCHECK_NE(dst, src2);
if (!CpuFeatures::IsSupported(AVX) && dst != mask) {
movdqu(dst.fp(), mask.fp());
S128Select(dst.fp(), dst.fp(), src1.fp(), src2.fp());
} else {
movaps(kScratchDoubleReg, src1.fp());
xorps(kScratchDoubleReg, src2.fp());
andps(kScratchDoubleReg, mask.fp());
if (dst.fp() != src2.fp()) movaps(dst.fp(), src2.fp());
xorps(dst.fp(), kScratchDoubleReg);
S128Select(dst.fp(), mask.fp(), src1.fp(), src2.fp());
}
}