PPC[liftoff]: Implement simd fp qfma ops

Change-Id: I4faac2355eb6d84a33674fd47bb2f728ace2ccb9
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/4086423
Reviewed-by: Vasili Skurydzin <vasili.skurydzin@ibm.com>
Commit-Queue: Milad Farazmand <mfarazma@redhat.com>
Cr-Commit-Position: refs/heads/main@{#84734}
This commit is contained in:
Milad Fa 2022-12-07 16:53:07 -05:00 committed by V8 LUCI CQ
parent c2a1261355
commit 73801dac2f
4 changed files with 77 additions and 68 deletions

View File

@ -4514,6 +4514,38 @@ void TurboAssembler::I16x8Q15MulRSatS(Simd128Register dst, Simd128Register src1,
vmhraddshs(dst, src1, src2, scratch);
}
void TurboAssembler::F64x2Qfma(Simd128Register dst, Simd128Register src1,
Simd128Register src2, Simd128Register src3,
Simd128Register scratch) {
vor(scratch, src2, src2);
xvmaddmdp(scratch, src3, src1);
vor(dst, scratch, scratch);
}
void TurboAssembler::F64x2Qfms(Simd128Register dst, Simd128Register src1,
Simd128Register src2, Simd128Register src3,
Simd128Register scratch) {
vor(scratch, src2, src2);
xvnmsubmdp(scratch, src3, src1);
vor(dst, scratch, scratch);
}
void TurboAssembler::F32x4Qfma(Simd128Register dst, Simd128Register src1,
Simd128Register src2, Simd128Register src3,
Simd128Register scratch) {
vor(scratch, src2, src2);
xvmaddmsp(scratch, src3, src1);
vor(dst, scratch, scratch);
}
void TurboAssembler::F32x4Qfms(Simd128Register dst, Simd128Register src1,
Simd128Register src2, Simd128Register src3,
Simd128Register scratch) {
vor(scratch, src2, src2);
xvnmsubmsp(scratch, src3, src1);
vor(dst, scratch, scratch);
}
void TurboAssembler::V128AnyTrue(Register dst, Simd128Register src,
Register scratch1, Register scratch2,
Simd128Register scratch3) {

View File

@ -1292,6 +1292,18 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
#undef PROTOTYPE_SIMD_ALL_TRUE
#undef SIMD_ALL_TRUE_LIST
#define SIMD_QFM_LIST(V) \
V(F64x2Qfma) \
V(F64x2Qfms) \
V(F32x4Qfma) \
V(F32x4Qfms)
#define PROTOTYPE_SIMD_QFM(name) \
void name(Simd128Register dst, Simd128Register src1, Simd128Register src2, \
Simd128Register src3, Simd128Register scratch);
SIMD_QFM_LIST(PROTOTYPE_SIMD_QFM)
#undef PROTOTYPE_SIMD_QFM
#undef SIMD_QFM_LIST
void LoadSimd128(Simd128Register dst, const MemOperand& mem,
Register scratch);
void StoreSimd128(Simd128Register src, const MemOperand& mem,

View File

@ -2419,6 +2419,22 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
#undef EMIT_SIMD_ALL_TRUE
#undef SIMD_ALL_TRUE_LIST
#define SIMD_QFM_LIST(V) \
V(F64x2Qfma) \
V(F64x2Qfms) \
V(F32x4Qfma) \
V(F32x4Qfms)
#define EMIT_SIMD_QFM(name) \
case kPPC_##name: { \
__ name(i.OutputSimd128Register(), i.InputSimd128Register(0), \
i.InputSimd128Register(1), i.InputSimd128Register(2), \
kScratchSimd128Reg); \
break; \
}
SIMD_QFM_LIST(EMIT_SIMD_QFM)
#undef EMIT_SIMD_QFM
#undef SIMD_QFM_LIST
case kPPC_F64x2Splat: {
__ F64x2Splat(i.OutputSimd128Register(), i.InputDoubleRegister(0),
kScratchReg);
@ -2642,46 +2658,6 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ vperm(dst, dst, kSimd128RegZero, kScratchSimd128Reg);
break;
}
case kPPC_F64x2Qfma: {
Simd128Register src0 = i.InputSimd128Register(0);
Simd128Register src1 = i.InputSimd128Register(1);
Simd128Register src2 = i.InputSimd128Register(2);
Simd128Register dst = i.OutputSimd128Register();
__ vor(kScratchSimd128Reg, src1, src1);
__ xvmaddmdp(kScratchSimd128Reg, src2, src0);
__ vor(dst, kScratchSimd128Reg, kScratchSimd128Reg);
break;
}
case kPPC_F64x2Qfms: {
Simd128Register src0 = i.InputSimd128Register(0);
Simd128Register src1 = i.InputSimd128Register(1);
Simd128Register src2 = i.InputSimd128Register(2);
Simd128Register dst = i.OutputSimd128Register();
__ vor(kScratchSimd128Reg, src1, src1);
__ xvnmsubmdp(kScratchSimd128Reg, src2, src0);
__ vor(dst, kScratchSimd128Reg, kScratchSimd128Reg);
break;
}
case kPPC_F32x4Qfma: {
Simd128Register src0 = i.InputSimd128Register(0);
Simd128Register src1 = i.InputSimd128Register(1);
Simd128Register src2 = i.InputSimd128Register(2);
Simd128Register dst = i.OutputSimd128Register();
__ vor(kScratchSimd128Reg, src1, src1);
__ xvmaddmsp(kScratchSimd128Reg, src2, src0);
__ vor(dst, kScratchSimd128Reg, kScratchSimd128Reg);
break;
}
case kPPC_F32x4Qfms: {
Simd128Register src0 = i.InputSimd128Register(0);
Simd128Register src1 = i.InputSimd128Register(1);
Simd128Register src2 = i.InputSimd128Register(2);
Simd128Register dst = i.OutputSimd128Register();
__ vor(kScratchSimd128Reg, src1, src1);
__ xvnmsubmsp(kScratchSimd128Reg, src2, src0);
__ vor(dst, kScratchSimd128Reg, kScratchSimd128Reg);
break;
}
case kPPC_I64x2BitMask: {
__ I64x2BitMask(i.OutputRegister(), i.InputSimd128Register(0),
kScratchReg, kScratchSimd128Reg);

View File

@ -2038,6 +2038,23 @@ SIMD_ALL_TRUE_LIST(EMIT_SIMD_ALL_TRUE)
#undef EMIT_SIMD_ALL_TRUE
#undef SIMD_ALL_TRUE_LIST
#define SIMD_QFM_LIST(V) \
V(f64x2_qfma, F64x2Qfma) \
V(f64x2_qfms, F64x2Qfms) \
V(f32x4_qfma, F32x4Qfma) \
V(f32x4_qfms, F32x4Qfms)
#define EMIT_SIMD_QFM(name, op) \
void LiftoffAssembler::emit_##name( \
LiftoffRegister dst, LiftoffRegister src1, LiftoffRegister src2, \
LiftoffRegister src3) { \
op(dst.fp().toSimd(), src1.fp().toSimd(), src2.fp().toSimd(), \
src3.fp().toSimd(), kScratchSimd128Reg); \
}
SIMD_QFM_LIST(EMIT_SIMD_QFM)
#undef EMIT_SIMD_QFM
#undef SIMD_QFM_LIST
void LiftoffAssembler::emit_f64x2_splat(LiftoffRegister dst,
LiftoffRegister src) {
F64x2Splat(dst.fp().toSimd(), src.fp(), r0);
@ -2450,34 +2467,6 @@ void LiftoffAssembler::emit_i32x4_trunc_sat_f64x2_u_zero(LiftoffRegister dst,
bailout(kSimd, "i32x4.trunc_sat_f64x2_u_zero");
}
void LiftoffAssembler::emit_f32x4_qfma(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2,
LiftoffRegister src3) {
bailout(kRelaxedSimd, "emit_f32x4_qfma");
}
void LiftoffAssembler::emit_f32x4_qfms(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2,
LiftoffRegister src3) {
bailout(kRelaxedSimd, "emit_f32x4_qfms");
}
void LiftoffAssembler::emit_f64x2_qfma(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2,
LiftoffRegister src3) {
bailout(kRelaxedSimd, "emit_f64x2_qfma");
}
void LiftoffAssembler::emit_f64x2_qfms(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2,
LiftoffRegister src3) {
bailout(kRelaxedSimd, "emit_f64x2_qfms");
}
void LiftoffAssembler::StackCheck(Label* ool_code, Register limit_address) {
LoadU64(limit_address, MemOperand(limit_address), r0);
CmpU64(sp, limit_address);