[wasm-simd] Move v128.select into SharedTurboAssembler

Bug: v8:11589
Change-Id: Iaabea832006e68f9506c1e191d324cee46680e20
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2791766
Reviewed-by: Bill Budge <bbudge@chromium.org>
Commit-Queue: Zhi An Ng <zhin@chromium.org>
Cr-Commit-Position: refs/heads/master@{#73715}
This commit is contained in:
Ng Zhi An 2021-03-29 10:10:16 -07:00 committed by Commit Bot
parent ebe13039b9
commit cfdac7f91d
8 changed files with 27 additions and 47 deletions

View File

@ -650,24 +650,6 @@ void TurboAssembler::I16x8ExtMulLow(XMMRegister dst, XMMRegister src1,
Pmullw(dst, scratch);
}
void TurboAssembler::S128Select(XMMRegister dst, XMMRegister mask,
XMMRegister src1, XMMRegister src2,
XMMRegister scratch) {
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope scope(this, AVX);
vpandn(scratch, mask, src2);
vpand(dst, src1, mask);
vpor(dst, dst, scratch);
} else {
DCHECK_EQ(dst, mask);
// Use float ops as they are 1 byte shorter than int ops.
movaps(scratch, dst);
andnps(scratch, src2);
andps(dst, src1);
orps(dst, scratch);
}
}
void TurboAssembler::I16x8Q15MulRSatS(XMMRegister dst, XMMRegister src1,
XMMRegister src2, XMMRegister scratch) {
// k = i16x8.splat(0x8000)

View File

@ -714,9 +714,6 @@ class V8_EXPORT_PRIVATE TurboAssembler : public SharedTurboAssembler {
// Defined here to allow usage on both TurboFan and Liftoff.
void I16x8ExtMulLow(XMMRegister dst, XMMRegister src1, XMMRegister src2,
XMMRegister scratch, bool is_signed);
// Requires dst == mask when AVX is not supported.
void S128Select(XMMRegister dst, XMMRegister mask, XMMRegister src1,
XMMRegister src2, XMMRegister scratch);
void I16x8Q15MulRSatS(XMMRegister dst, XMMRegister src1, XMMRegister src2,
XMMRegister scratch);
void S128Store32Lane(Operand dst, XMMRegister src, uint8_t laneidx);

View File

@ -350,5 +350,25 @@ void SharedTurboAssembler::I64x2UConvertI32x4High(XMMRegister dst,
}
}
void SharedTurboAssembler::S128Select(XMMRegister dst, XMMRegister mask,
XMMRegister src1, XMMRegister src2,
XMMRegister scratch) {
// v128.select = v128.or(v128.and(v1, c), v128.andnot(v2, c)).
// pandn(x, y) = !x & y, so we have to flip the mask and input.
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope avx_scope(this, AVX);
vpandn(scratch, mask, src2);
vpand(dst, src1, mask);
vpor(dst, dst, scratch);
} else {
DCHECK_EQ(dst, mask);
// Use float ops as they are 1 byte shorter than int ops.
movaps(scratch, mask);
andnps(scratch, src2);
andps(dst, src1);
orps(dst, scratch);
}
}
} // namespace internal
} // namespace v8

View File

@ -46,6 +46,9 @@ class V8_EXPORT_PRIVATE SharedTurboAssembler : public TurboAssemblerBase {
void I64x2SConvertI32x4High(XMMRegister dst, XMMRegister src);
void I64x2UConvertI32x4High(XMMRegister dst, XMMRegister src,
XMMRegister scratch);
// Requires dst == mask when AVX is not supported.
void S128Select(XMMRegister dst, XMMRegister mask, XMMRegister src1,
XMMRegister src2, XMMRegister scratch);
};
} // namespace internal
} // namespace v8

View File

@ -2433,25 +2433,6 @@ void TurboAssembler::Psrld(XMMRegister dst, XMMRegister src, byte imm8) {
}
}
void TurboAssembler::S128Select(XMMRegister dst, XMMRegister mask,
XMMRegister src1, XMMRegister src2) {
// v128.select = v128.or(v128.and(v1, c), v128.andnot(v2, c)).
// pandn(x, y) = !x & y, so we have to flip the mask and input.
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope avx_scope(this, AVX);
vpandn(kScratchDoubleReg, mask, src2);
vpand(dst, src1, mask);
vpor(dst, dst, kScratchDoubleReg);
} else {
DCHECK_EQ(dst, mask);
// Use float ops as they are 1 byte shorter than int ops.
movaps(kScratchDoubleReg, mask);
andnps(kScratchDoubleReg, src2);
andps(dst, src1);
orps(dst, kScratchDoubleReg);
}
}
void TurboAssembler::Lzcntl(Register dst, Register src) {
if (CpuFeatures::IsSupported(LZCNT)) {
CpuFeatureScope scope(this, LZCNT);

View File

@ -608,10 +608,6 @@ class V8_EXPORT_PRIVATE TurboAssembler : public SharedTurboAssembler {
// helpers are optimized to produce the fastest and smallest codegen.
// Defined here to allow usage on both TurboFan and Liftoff.
// Requires dst == mask when AVX is not supported.
void S128Select(XMMRegister dst, XMMRegister mask, XMMRegister src1,
XMMRegister src2);
// TODO(zhin): Move this into shared-ia32-x64-macro-assembler.
void I16x8ExtMulLow(XMMRegister dst, XMMRegister src1, XMMRegister src2,
bool is_signed);

View File

@ -3685,7 +3685,8 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
}
case kX64S128Select: {
__ S128Select(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(1), i.InputSimd128Register(2));
i.InputSimd128Register(1), i.InputSimd128Register(2),
kScratchDoubleReg);
break;
}
case kX64S128AndNot: {

View File

@ -2882,9 +2882,9 @@ void LiftoffAssembler::emit_s128_select(LiftoffRegister dst,
DCHECK_NE(dst, src2);
if (!CpuFeatures::IsSupported(AVX) && dst != mask) {
movaps(dst.fp(), mask.fp());
S128Select(dst.fp(), dst.fp(), src1.fp(), src2.fp());
S128Select(dst.fp(), dst.fp(), src1.fp(), src2.fp(), kScratchDoubleReg);
} else {
S128Select(dst.fp(), mask.fp(), src1.fp(), src2.fp());
S128Select(dst.fp(), mask.fp(), src1.fp(), src2.fp(), kScratchDoubleReg);
}
}