[wasm-simd] Move v128.select into SharedTurboAssembler
Bug: v8:11589 Change-Id: Iaabea832006e68f9506c1e191d324cee46680e20 Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2791766 Reviewed-by: Bill Budge <bbudge@chromium.org> Commit-Queue: Zhi An Ng <zhin@chromium.org> Cr-Commit-Position: refs/heads/master@{#73715}
This commit is contained in:
parent
ebe13039b9
commit
cfdac7f91d
@ -650,24 +650,6 @@ void TurboAssembler::I16x8ExtMulLow(XMMRegister dst, XMMRegister src1,
|
||||
Pmullw(dst, scratch);
|
||||
}
|
||||
|
||||
void TurboAssembler::S128Select(XMMRegister dst, XMMRegister mask,
|
||||
XMMRegister src1, XMMRegister src2,
|
||||
XMMRegister scratch) {
|
||||
if (CpuFeatures::IsSupported(AVX)) {
|
||||
CpuFeatureScope scope(this, AVX);
|
||||
vpandn(scratch, mask, src2);
|
||||
vpand(dst, src1, mask);
|
||||
vpor(dst, dst, scratch);
|
||||
} else {
|
||||
DCHECK_EQ(dst, mask);
|
||||
// Use float ops as they are 1 byte shorter than int ops.
|
||||
movaps(scratch, dst);
|
||||
andnps(scratch, src2);
|
||||
andps(dst, src1);
|
||||
orps(dst, scratch);
|
||||
}
|
||||
}
|
||||
|
||||
void TurboAssembler::I16x8Q15MulRSatS(XMMRegister dst, XMMRegister src1,
|
||||
XMMRegister src2, XMMRegister scratch) {
|
||||
// k = i16x8.splat(0x8000)
|
||||
|
@ -714,9 +714,6 @@ class V8_EXPORT_PRIVATE TurboAssembler : public SharedTurboAssembler {
|
||||
// Defined here to allow usage on both TurboFan and Liftoff.
|
||||
void I16x8ExtMulLow(XMMRegister dst, XMMRegister src1, XMMRegister src2,
|
||||
XMMRegister scratch, bool is_signed);
|
||||
// Requires dst == mask when AVX is not supported.
|
||||
void S128Select(XMMRegister dst, XMMRegister mask, XMMRegister src1,
|
||||
XMMRegister src2, XMMRegister scratch);
|
||||
void I16x8Q15MulRSatS(XMMRegister dst, XMMRegister src1, XMMRegister src2,
|
||||
XMMRegister scratch);
|
||||
void S128Store32Lane(Operand dst, XMMRegister src, uint8_t laneidx);
|
||||
|
@ -350,5 +350,25 @@ void SharedTurboAssembler::I64x2UConvertI32x4High(XMMRegister dst,
|
||||
}
|
||||
}
|
||||
|
||||
void SharedTurboAssembler::S128Select(XMMRegister dst, XMMRegister mask,
|
||||
XMMRegister src1, XMMRegister src2,
|
||||
XMMRegister scratch) {
|
||||
// v128.select = v128.or(v128.and(v1, c), v128.andnot(v2, c)).
|
||||
// pandn(x, y) = !x & y, so we have to flip the mask and input.
|
||||
if (CpuFeatures::IsSupported(AVX)) {
|
||||
CpuFeatureScope avx_scope(this, AVX);
|
||||
vpandn(scratch, mask, src2);
|
||||
vpand(dst, src1, mask);
|
||||
vpor(dst, dst, scratch);
|
||||
} else {
|
||||
DCHECK_EQ(dst, mask);
|
||||
// Use float ops as they are 1 byte shorter than int ops.
|
||||
movaps(scratch, mask);
|
||||
andnps(scratch, src2);
|
||||
andps(dst, src1);
|
||||
orps(dst, scratch);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace internal
|
||||
} // namespace v8
|
||||
|
@ -46,6 +46,9 @@ class V8_EXPORT_PRIVATE SharedTurboAssembler : public TurboAssemblerBase {
|
||||
void I64x2SConvertI32x4High(XMMRegister dst, XMMRegister src);
|
||||
void I64x2UConvertI32x4High(XMMRegister dst, XMMRegister src,
|
||||
XMMRegister scratch);
|
||||
// Requires dst == mask when AVX is not supported.
|
||||
void S128Select(XMMRegister dst, XMMRegister mask, XMMRegister src1,
|
||||
XMMRegister src2, XMMRegister scratch);
|
||||
};
|
||||
} // namespace internal
|
||||
} // namespace v8
|
||||
|
@ -2433,25 +2433,6 @@ void TurboAssembler::Psrld(XMMRegister dst, XMMRegister src, byte imm8) {
|
||||
}
|
||||
}
|
||||
|
||||
void TurboAssembler::S128Select(XMMRegister dst, XMMRegister mask,
|
||||
XMMRegister src1, XMMRegister src2) {
|
||||
// v128.select = v128.or(v128.and(v1, c), v128.andnot(v2, c)).
|
||||
// pandn(x, y) = !x & y, so we have to flip the mask and input.
|
||||
if (CpuFeatures::IsSupported(AVX)) {
|
||||
CpuFeatureScope avx_scope(this, AVX);
|
||||
vpandn(kScratchDoubleReg, mask, src2);
|
||||
vpand(dst, src1, mask);
|
||||
vpor(dst, dst, kScratchDoubleReg);
|
||||
} else {
|
||||
DCHECK_EQ(dst, mask);
|
||||
// Use float ops as they are 1 byte shorter than int ops.
|
||||
movaps(kScratchDoubleReg, mask);
|
||||
andnps(kScratchDoubleReg, src2);
|
||||
andps(dst, src1);
|
||||
orps(dst, kScratchDoubleReg);
|
||||
}
|
||||
}
|
||||
|
||||
void TurboAssembler::Lzcntl(Register dst, Register src) {
|
||||
if (CpuFeatures::IsSupported(LZCNT)) {
|
||||
CpuFeatureScope scope(this, LZCNT);
|
||||
|
@ -608,10 +608,6 @@ class V8_EXPORT_PRIVATE TurboAssembler : public SharedTurboAssembler {
|
||||
// helpers are optimized to produce the fastest and smallest codegen.
|
||||
// Defined here to allow usage on both TurboFan and Liftoff.
|
||||
|
||||
// Requires dst == mask when AVX is not supported.
|
||||
void S128Select(XMMRegister dst, XMMRegister mask, XMMRegister src1,
|
||||
XMMRegister src2);
|
||||
|
||||
// TODO(zhin): Move this into shared-ia32-x64-macro-assembler.
|
||||
void I16x8ExtMulLow(XMMRegister dst, XMMRegister src1, XMMRegister src2,
|
||||
bool is_signed);
|
||||
|
@ -3685,7 +3685,8 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
|
||||
}
|
||||
case kX64S128Select: {
|
||||
__ S128Select(i.OutputSimd128Register(), i.InputSimd128Register(0),
|
||||
i.InputSimd128Register(1), i.InputSimd128Register(2));
|
||||
i.InputSimd128Register(1), i.InputSimd128Register(2),
|
||||
kScratchDoubleReg);
|
||||
break;
|
||||
}
|
||||
case kX64S128AndNot: {
|
||||
|
@ -2882,9 +2882,9 @@ void LiftoffAssembler::emit_s128_select(LiftoffRegister dst,
|
||||
DCHECK_NE(dst, src2);
|
||||
if (!CpuFeatures::IsSupported(AVX) && dst != mask) {
|
||||
movaps(dst.fp(), mask.fp());
|
||||
S128Select(dst.fp(), dst.fp(), src1.fp(), src2.fp());
|
||||
S128Select(dst.fp(), dst.fp(), src1.fp(), src2.fp(), kScratchDoubleReg);
|
||||
} else {
|
||||
S128Select(dst.fp(), mask.fp(), src1.fp(), src2.fp());
|
||||
S128Select(dst.fp(), mask.fp(), src1.fp(), src2.fp(), kScratchDoubleReg);
|
||||
}
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user