S390 [liftoff]: Implement simd integer dot product

Change-Id: I809ebfb3e7c11a7cf61873043abae85dc069ed66
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3464914
Reviewed-by: Junliang Yan <junyan@redhat.com>
Commit-Queue: Milad Farazmand <mfarazma@redhat.com>
Cr-Commit-Position: refs/heads/main@{#79108}
This commit is contained in:
Milad Fa 2022-02-15 12:10:33 -05:00 committed by V8 LUCI CQ
parent 8bad451601
commit 0ac7e1203f
5 changed files with 14 additions and 10 deletions

View File

@ -6007,6 +6007,14 @@ void TurboAssembler::I8x16Shuffle(Simd128Register dst, Simd128Register src1,
vperm(dst, src1, src2, scratch3, Condition(0), Condition(0));
}
void TurboAssembler::I32x4DotI16x8S(Simd128Register dst, Simd128Register src1,
Simd128Register src2,
Simd128Register scratch) {
vme(scratch, src1, src2, Condition(0), Condition(0), Condition(1));
vmo(dst, src1, src2, Condition(0), Condition(0), Condition(1));
va(dst, scratch, dst, Condition(0), Condition(0), Condition(2));
}
// Vector LE Load and Transform instructions.
#ifdef V8_TARGET_BIG_ENDIAN
#define IS_BIG_ENDIAN true

View File

@ -1153,6 +1153,8 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
Simd128Register src2, uint64_t high, uint64_t low,
Register scratch1, Register scratch2,
Simd128Register scratch3);
void I32x4DotI16x8S(Simd128Register dst, Simd128Register src1,
Simd128Register src2, Simd128Register scratch);
void S128Select(Simd128Register dst, Simd128Register src1,
Simd128Register src2, Simd128Register mask);

View File

@ -2971,14 +2971,8 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
break;
}
case kS390_I32x4DotI16x8S: {
Simd128Register tempFPReg1 = i.ToSimd128Register(instr->TempAt(0));
__ vme(kScratchDoubleReg, i.InputSimd128Register(0),
i.InputSimd128Register(1), Condition(0), Condition(0),
Condition(1));
__ vmo(tempFPReg1, i.InputSimd128Register(0), i.InputSimd128Register(1),
Condition(0), Condition(0), Condition(1));
__ va(i.OutputSimd128Register(), kScratchDoubleReg, tempFPReg1,
Condition(0), Condition(0), Condition(2));
__ I32x4DotI16x8S(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(1), kScratchDoubleReg);
break;
}
#define Q15_MUL_ROAUND(accumulator, unpack) \

View File

@ -2470,6 +2470,7 @@ void InstructionSelector::VisitWord64AtomicStore(Node* node) {
V(I32x4Shl) \
V(I32x4ShrS) \
V(I32x4ShrU) \
V(I32x4DotI16x8S) \
V(I16x8Add) \
V(I16x8Sub) \
V(I16x8Mul) \
@ -2517,7 +2518,6 @@ void InstructionSelector::VisitWord64AtomicStore(Node* node) {
V(S128AndNot)
#define SIMD_BINOP_UNIQUE_REGISTER_LIST(V) \
V(I32x4DotI16x8S) \
V(I16x8AddSatS) \
V(I16x8SubSatS) \
V(I16x8AddSatU) \

View File

@ -2628,7 +2628,7 @@ void LiftoffAssembler::emit_i32x4_bitmask(LiftoffRegister dst,
void LiftoffAssembler::emit_i32x4_dot_i16x8_s(LiftoffRegister dst,
LiftoffRegister lhs,
LiftoffRegister rhs) {
bailout(kSimd, "i32x4_dot_i16x8_s");
I32x4DotI16x8S(dst.fp(), lhs.fp(), rhs.fp(), kScratchDoubleReg);
}
void LiftoffAssembler::emit_i16x8_bitmask(LiftoffRegister dst,