[wasm-simd][liftoff][ia32] Implement saturating rounding multiply high

Create a macro-assembler helper function to enable code sharing between
Liftoff and TurboFan.

Bug: v8:10971
Change-Id: I8d8132f4cf3386b28cdf5350fde2e076428d68c6
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2621860
Commit-Queue: Zhi An Ng <zhin@chromium.org>
Reviewed-by: Clemens Backes <clemensb@chromium.org>
Reviewed-by: Bill Budge <bbudge@chromium.org>
Cr-Commit-Position: refs/heads/master@{#72067}
This commit is contained in:
Zhi An Ng 2021-01-13 07:10:44 +00:00 committed by Commit Bot
parent bbcb20a13e
commit e94084f6af
4 changed files with 33 additions and 12 deletions

View File

@ -650,6 +650,20 @@ void TurboAssembler::Roundpd(XMMRegister dst, XMMRegister src,
}
}
void TurboAssembler::Pmulhrsw(XMMRegister dst, XMMRegister src1,
XMMRegister src2) {
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope avx_scope(this, AVX);
vpmulhrsw(dst, src1, src2);
} else {
if (dst != src1) {
movdqu(dst, src1);
}
CpuFeatureScope sse_scope(this, SSSE3);
pmulhrsw(dst, src2);
}
}
// 1. Unpack src0, src1 into even-number elements of scratch.
// 2. Unpack src1, src0 into even-number elements of dst.
// 3. Multiply 1. with 2.
@ -763,6 +777,17 @@ void TurboAssembler::I64x2UConvertI32x4High(XMMRegister dst, XMMRegister src,
}
}
void TurboAssembler::I16x8Q15MulRSatS(XMMRegister dst, XMMRegister src1,
XMMRegister src2, XMMRegister scratch) {
// k = i16x8.splat(0x8000)
Pcmpeqd(scratch, scratch);
Psllw(scratch, scratch, byte{15});
Pmulhrsw(dst, src1, src2);
Pcmpeqw(scratch, dst);
Pxor(dst, scratch);
}
void TurboAssembler::ShlPair(Register high, Register low, uint8_t shift) {
DCHECK_GE(63, shift);
if (shift >= 32) {

View File

@ -527,7 +527,6 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
AVX_OP3_XO_SSE4(Pmaxsd, pmaxsd)
AVX_OP3_WITH_TYPE_SCOPE(Pmaddubsw, pmaddubsw, XMMRegister, XMMRegister, SSSE3)
AVX_OP3_WITH_TYPE_SCOPE(Pmulhrsw, pmulhrsw, XMMRegister, XMMRegister, SSSE3)
#undef AVX_OP3_XO_SSE4
#undef AVX_OP3_WITH_TYPE_SCOPE
@ -612,6 +611,9 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
void Roundps(XMMRegister dst, XMMRegister src, RoundingMode mode);
void Roundpd(XMMRegister dst, XMMRegister src, RoundingMode mode);
// Handles SSE and AVX. On SSE, moves src to dst if they are not equal.
void Pmulhrsw(XMMRegister dst, XMMRegister src1, XMMRegister src2);
// These Wasm SIMD ops do not have direct lowerings on IA32. These
// helpers are optimized to produce the fastest and smallest codegen.
// Defined here to allow usage on both TurboFan and Liftoff.
@ -628,6 +630,8 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
void I64x2SConvertI32x4High(XMMRegister dst, XMMRegister src);
void I64x2UConvertI32x4High(XMMRegister dst, XMMRegister src,
XMMRegister scratch);
void I16x8Q15MulRSatS(XMMRegister dst, XMMRegister src1, XMMRegister src2,
XMMRegister scratch);
void Push(Register src) { push(src); }
void Push(Operand src) { push(src); }

View File

@ -2358,16 +2358,8 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
break;
}
case kIA32I16x8Q15MulRSatS: {
XMMRegister dst = i.OutputSimd128Register();
XMMRegister src0 = i.InputSimd128Register(0);
XMMRegister src1 = i.InputSimd128Register(1);
// k = i16x8.splat(0x8000)
__ Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
__ Psllw(kScratchDoubleReg, kScratchDoubleReg, byte{15});
__ Pmulhrsw(dst, src0, src1);
__ Pcmpeqw(kScratchDoubleReg, dst);
__ Pxor(dst, kScratchDoubleReg);
__ I16x8Q15MulRSatS(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(1), kScratchDoubleReg);
break;
}
case kIA32I32x4SignSelect: {

View File

@ -3577,7 +3577,7 @@ void LiftoffAssembler::emit_i16x8_extmul_high_i8x16_u(LiftoffRegister dst,
void LiftoffAssembler::emit_i16x8_q15mulr_sat_s(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2) {
bailout(kSimd, "i16x8_q15mulr_sat_s");
I16x8Q15MulRSatS(dst.fp(), src1.fp(), src2.fp(), liftoff::kScratchDoubleReg);
}
void LiftoffAssembler::emit_i32x4_neg(LiftoffRegister dst,