[wasm-simd][liftoff][ia32] Implement saturating rounding multiply high
Create a macro-assembler helper function to enable code sharing between Liftoff and TurboFan. Bug: v8:10971 Change-Id: I8d8132f4cf3386b28cdf5350fde2e076428d68c6 Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2621860 Commit-Queue: Zhi An Ng <zhin@chromium.org> Reviewed-by: Clemens Backes <clemensb@chromium.org> Reviewed-by: Bill Budge <bbudge@chromium.org> Cr-Commit-Position: refs/heads/master@{#72067}
This commit is contained in:
parent
bbcb20a13e
commit
e94084f6af
@ -650,6 +650,20 @@ void TurboAssembler::Roundpd(XMMRegister dst, XMMRegister src,
|
||||
}
|
||||
}
|
||||
|
||||
void TurboAssembler::Pmulhrsw(XMMRegister dst, XMMRegister src1,
|
||||
XMMRegister src2) {
|
||||
if (CpuFeatures::IsSupported(AVX)) {
|
||||
CpuFeatureScope avx_scope(this, AVX);
|
||||
vpmulhrsw(dst, src1, src2);
|
||||
} else {
|
||||
if (dst != src1) {
|
||||
movdqu(dst, src1);
|
||||
}
|
||||
CpuFeatureScope sse_scope(this, SSSE3);
|
||||
pmulhrsw(dst, src2);
|
||||
}
|
||||
}
|
||||
|
||||
// 1. Unpack src0, src1 into even-number elements of scratch.
|
||||
// 2. Unpack src1, src0 into even-number elements of dst.
|
||||
// 3. Multiply 1. with 2.
|
||||
@ -763,6 +777,17 @@ void TurboAssembler::I64x2UConvertI32x4High(XMMRegister dst, XMMRegister src,
|
||||
}
|
||||
}
|
||||
|
||||
void TurboAssembler::I16x8Q15MulRSatS(XMMRegister dst, XMMRegister src1,
|
||||
XMMRegister src2, XMMRegister scratch) {
|
||||
// k = i16x8.splat(0x8000)
|
||||
Pcmpeqd(scratch, scratch);
|
||||
Psllw(scratch, scratch, byte{15});
|
||||
|
||||
Pmulhrsw(dst, src1, src2);
|
||||
Pcmpeqw(scratch, dst);
|
||||
Pxor(dst, scratch);
|
||||
}
|
||||
|
||||
void TurboAssembler::ShlPair(Register high, Register low, uint8_t shift) {
|
||||
DCHECK_GE(63, shift);
|
||||
if (shift >= 32) {
|
||||
|
@ -527,7 +527,6 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
|
||||
|
||||
AVX_OP3_XO_SSE4(Pmaxsd, pmaxsd)
|
||||
AVX_OP3_WITH_TYPE_SCOPE(Pmaddubsw, pmaddubsw, XMMRegister, XMMRegister, SSSE3)
|
||||
AVX_OP3_WITH_TYPE_SCOPE(Pmulhrsw, pmulhrsw, XMMRegister, XMMRegister, SSSE3)
|
||||
|
||||
#undef AVX_OP3_XO_SSE4
|
||||
#undef AVX_OP3_WITH_TYPE_SCOPE
|
||||
@ -612,6 +611,9 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
|
||||
void Roundps(XMMRegister dst, XMMRegister src, RoundingMode mode);
|
||||
void Roundpd(XMMRegister dst, XMMRegister src, RoundingMode mode);
|
||||
|
||||
// Handles SSE and AVX. On SSE, moves src to dst if they are not equal.
|
||||
void Pmulhrsw(XMMRegister dst, XMMRegister src1, XMMRegister src2);
|
||||
|
||||
// These Wasm SIMD ops do not have direct lowerings on IA32. These
|
||||
// helpers are optimized to produce the fastest and smallest codegen.
|
||||
// Defined here to allow usage on both TurboFan and Liftoff.
|
||||
@ -628,6 +630,8 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
|
||||
void I64x2SConvertI32x4High(XMMRegister dst, XMMRegister src);
|
||||
void I64x2UConvertI32x4High(XMMRegister dst, XMMRegister src,
|
||||
XMMRegister scratch);
|
||||
void I16x8Q15MulRSatS(XMMRegister dst, XMMRegister src1, XMMRegister src2,
|
||||
XMMRegister scratch);
|
||||
|
||||
void Push(Register src) { push(src); }
|
||||
void Push(Operand src) { push(src); }
|
||||
|
@ -2358,16 +2358,8 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
|
||||
break;
|
||||
}
|
||||
case kIA32I16x8Q15MulRSatS: {
|
||||
XMMRegister dst = i.OutputSimd128Register();
|
||||
XMMRegister src0 = i.InputSimd128Register(0);
|
||||
XMMRegister src1 = i.InputSimd128Register(1);
|
||||
// k = i16x8.splat(0x8000)
|
||||
__ Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
|
||||
__ Psllw(kScratchDoubleReg, kScratchDoubleReg, byte{15});
|
||||
|
||||
__ Pmulhrsw(dst, src0, src1);
|
||||
__ Pcmpeqw(kScratchDoubleReg, dst);
|
||||
__ Pxor(dst, kScratchDoubleReg);
|
||||
__ I16x8Q15MulRSatS(i.OutputSimd128Register(), i.InputSimd128Register(0),
|
||||
i.InputSimd128Register(1), kScratchDoubleReg);
|
||||
break;
|
||||
}
|
||||
case kIA32I32x4SignSelect: {
|
||||
|
@ -3577,7 +3577,7 @@ void LiftoffAssembler::emit_i16x8_extmul_high_i8x16_u(LiftoffRegister dst,
|
||||
void LiftoffAssembler::emit_i16x8_q15mulr_sat_s(LiftoffRegister dst,
|
||||
LiftoffRegister src1,
|
||||
LiftoffRegister src2) {
|
||||
bailout(kSimd, "i16x8_q15mulr_sat_s");
|
||||
I16x8Q15MulRSatS(dst.fp(), src1.fp(), src2.fp(), liftoff::kScratchDoubleReg);
|
||||
}
|
||||
|
||||
void LiftoffAssembler::emit_i32x4_neg(LiftoffRegister dst,
|
||||
|
Loading…
Reference in New Issue
Block a user