Reland "[wasm-simd][liftoff][x64] Move v128.select into macro-assembler"
This is a reland of 2d5f981a04
The fix is in liftoff-assembler-x64, to call S128Select with dst
as the mask when AVX is not supported and dst != mask.
Original change's description:
> [wasm-simd][liftoff][x64] Move v128.select into macro-assembler
>
> This allows us to reuse this optimized code sequence in Liftoff.
>
> We can't do the same thing in IA32 yet, there is no kScratchDoubleReg
> defined in the macro-assembler-ia32.cc, it is defined in code-generator-ia32
> as xmm0 but I'm not sure if it is safe to just use that in the macro assembler.
>
> Change-Id: I6c761857c49d2518fbc82cd0796c62fc86665cb5
> Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2596581
> Commit-Queue: Zhi An Ng <zhin@chromium.org>
> Reviewed-by: Clemens Backes <clemensb@chromium.org>
> Reviewed-by: Bill Budge <bbudge@chromium.org>
> Cr-Commit-Position: refs/heads/master@{#71915}
Change-Id: Ib96ce0e1d5762f6513ef87f240b25ef3ae59441f
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2612324
Reviewed-by: Clemens Backes <clemensb@chromium.org>
Reviewed-by: Bill Budge <bbudge@chromium.org>
Commit-Queue: Zhi An Ng <zhin@chromium.org>
Cr-Commit-Position: refs/heads/master@{#71961}
This commit is contained in:
parent
2893b9fbd6
commit
025443a4a8
@ -2214,6 +2214,25 @@ void TurboAssembler::Psrld(XMMRegister dst, XMMRegister src, byte imm8) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void TurboAssembler::S128Select(XMMRegister dst, XMMRegister mask,
|
||||||
|
XMMRegister src1, XMMRegister src2) {
|
||||||
|
// v128.select = v128.or(v128.and(v1, c), v128.andnot(v2, c)).
|
||||||
|
// pandn(x, y) = !x & y, so we have to flip the mask and input.
|
||||||
|
if (CpuFeatures::IsSupported(AVX)) {
|
||||||
|
CpuFeatureScope avx_scope(this, AVX);
|
||||||
|
vpandn(kScratchDoubleReg, mask, src2);
|
||||||
|
vpand(dst, src1, mask);
|
||||||
|
vpor(dst, dst, kScratchDoubleReg);
|
||||||
|
} else {
|
||||||
|
DCHECK_EQ(dst, mask);
|
||||||
|
// Use float ops as they are 1 byte shorter than int ops.
|
||||||
|
movaps(kScratchDoubleReg, mask);
|
||||||
|
andnps(kScratchDoubleReg, src2);
|
||||||
|
andps(dst, src1);
|
||||||
|
orps(dst, kScratchDoubleReg);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void TurboAssembler::Lzcntl(Register dst, Register src) {
|
void TurboAssembler::Lzcntl(Register dst, Register src) {
|
||||||
if (CpuFeatures::IsSupported(LZCNT)) {
|
if (CpuFeatures::IsSupported(LZCNT)) {
|
||||||
CpuFeatureScope scope(this, LZCNT);
|
CpuFeatureScope scope(this, LZCNT);
|
||||||
|
@ -578,6 +578,10 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
|
|||||||
void I32x4SConvertI16x8High(XMMRegister dst, XMMRegister src);
|
void I32x4SConvertI16x8High(XMMRegister dst, XMMRegister src);
|
||||||
void I32x4UConvertI16x8High(XMMRegister dst, XMMRegister src);
|
void I32x4UConvertI16x8High(XMMRegister dst, XMMRegister src);
|
||||||
|
|
||||||
|
// Requires dst == mask when AVX is not supported.
|
||||||
|
void S128Select(XMMRegister dst, XMMRegister mask, XMMRegister src1,
|
||||||
|
XMMRegister src2);
|
||||||
|
|
||||||
void I64x2ExtMul(XMMRegister dst, XMMRegister src1, XMMRegister src2,
|
void I64x2ExtMul(XMMRegister dst, XMMRegister src1, XMMRegister src2,
|
||||||
bool low, bool is_signed);
|
bool low, bool is_signed);
|
||||||
// Requires that dst == src1 if AVX is not supported.
|
// Requires that dst == src1 if AVX is not supported.
|
||||||
|
@ -3762,25 +3762,8 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case kX64S128Select: {
|
case kX64S128Select: {
|
||||||
// v128.select = v128.or(v128.and(v1, c), v128.andnot(v2, c)).
|
__ S128Select(i.OutputSimd128Register(), i.InputSimd128Register(0),
|
||||||
// pandn(x, y) = !x & y, so we have to flip the mask and input.
|
i.InputSimd128Register(1), i.InputSimd128Register(2));
|
||||||
XMMRegister dst = i.OutputSimd128Register();
|
|
||||||
XMMRegister mask = i.InputSimd128Register(0);
|
|
||||||
XMMRegister src1 = i.InputSimd128Register(1);
|
|
||||||
XMMRegister src2 = i.InputSimd128Register(2);
|
|
||||||
if (CpuFeatures::IsSupported(AVX)) {
|
|
||||||
CpuFeatureScope avx_scope(tasm(), AVX);
|
|
||||||
__ vpandn(kScratchDoubleReg, mask, src2);
|
|
||||||
__ vpand(dst, src1, mask);
|
|
||||||
__ vpor(dst, dst, kScratchDoubleReg);
|
|
||||||
} else {
|
|
||||||
DCHECK_EQ(dst, mask);
|
|
||||||
// Use float ops as they are 1 byte shorter than int ops.
|
|
||||||
__ movaps(kScratchDoubleReg, mask);
|
|
||||||
__ andnps(kScratchDoubleReg, src2);
|
|
||||||
__ andps(dst, src1);
|
|
||||||
__ orps(dst, kScratchDoubleReg);
|
|
||||||
}
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case kX64S128AndNot: {
|
case kX64S128AndNot: {
|
||||||
|
@ -7,6 +7,7 @@
|
|||||||
|
|
||||||
#include "src/base/platform/wrappers.h"
|
#include "src/base/platform/wrappers.h"
|
||||||
#include "src/codegen/assembler.h"
|
#include "src/codegen/assembler.h"
|
||||||
|
#include "src/codegen/cpu-features.h"
|
||||||
#include "src/heap/memory-chunk.h"
|
#include "src/heap/memory-chunk.h"
|
||||||
#include "src/wasm/baseline/liftoff-assembler.h"
|
#include "src/wasm/baseline/liftoff-assembler.h"
|
||||||
#include "src/wasm/simd-shuffle.h"
|
#include "src/wasm/simd-shuffle.h"
|
||||||
@ -2756,17 +2757,14 @@ void LiftoffAssembler::emit_s128_select(LiftoffRegister dst,
|
|||||||
LiftoffRegister src1,
|
LiftoffRegister src1,
|
||||||
LiftoffRegister src2,
|
LiftoffRegister src2,
|
||||||
LiftoffRegister mask) {
|
LiftoffRegister mask) {
|
||||||
if (CpuFeatures::IsSupported(AVX)) {
|
// Ensure that we don't overwrite any inputs with the movdqu below.
|
||||||
CpuFeatureScope scope(this, AVX);
|
DCHECK_NE(dst, src1);
|
||||||
vxorps(kScratchDoubleReg, src1.fp(), src2.fp());
|
DCHECK_NE(dst, src2);
|
||||||
vandps(kScratchDoubleReg, kScratchDoubleReg, mask.fp());
|
if (!CpuFeatures::IsSupported(AVX) && dst != mask) {
|
||||||
vxorps(dst.fp(), kScratchDoubleReg, src2.fp());
|
movdqu(dst.fp(), mask.fp());
|
||||||
|
S128Select(dst.fp(), dst.fp(), src1.fp(), src2.fp());
|
||||||
} else {
|
} else {
|
||||||
movaps(kScratchDoubleReg, src1.fp());
|
S128Select(dst.fp(), mask.fp(), src1.fp(), src2.fp());
|
||||||
xorps(kScratchDoubleReg, src2.fp());
|
|
||||||
andps(kScratchDoubleReg, mask.fp());
|
|
||||||
if (dst.fp() != src2.fp()) movaps(dst.fp(), src2.fp());
|
|
||||||
xorps(dst.fp(), kScratchDoubleReg);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user