S390 [liftoff]: Implement simd extend multiply

Implementations are added to macro-assembler to be shared between
liftoff and TF code generator.

Change-Id: I0d1c9e8bcd2dfd89b5ed4a273821766763565f54
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3417438
Reviewed-by: Junliang Yan <junyan@redhat.com>
Commit-Queue: Milad Farazmand <mfarazma@redhat.com>
Cr-Commit-Position: refs/heads/main@{#78790}
This commit is contained in:
Milad Fa 2022-01-26 12:17:48 -05:00 committed by V8 LUCI CQ
parent d484d44a34
commit 2f0ae10e33
4 changed files with 95 additions and 131 deletions

View File

@ -5368,6 +5368,32 @@ SIMD_SHIFT_LIST(EMIT_SIMD_SHIFT)
#undef EMIT_SIMD_SHIFT
#undef SIMD_SHIFT_LIST
#define SIMD_EXT_MUL_LIST(V) \
V(I64x2ExtMulLowI32x4S, vme, vmo, vmrl, 2) \
V(I64x2ExtMulHighI32x4S, vme, vmo, vmrh, 2) \
V(I64x2ExtMulLowI32x4U, vmle, vmlo, vmrl, 2) \
V(I64x2ExtMulHighI32x4U, vmle, vmlo, vmrh, 2) \
V(I32x4ExtMulLowI16x8S, vme, vmo, vmrl, 1) \
V(I32x4ExtMulHighI16x8S, vme, vmo, vmrh, 1) \
V(I32x4ExtMulLowI16x8U, vmle, vmlo, vmrl, 1) \
V(I32x4ExtMulHighI16x8U, vmle, vmlo, vmrh, 1) \
V(I16x8ExtMulLowI8x16S, vme, vmo, vmrl, 0) \
V(I16x8ExtMulHighI8x16S, vme, vmo, vmrh, 0) \
V(I16x8ExtMulLowI8x16U, vmle, vmlo, vmrl, 0) \
V(I16x8ExtMulHighI8x16U, vmle, vmlo, vmrh, 0)
#define EMIT_SIMD_EXT_MUL(name, mul_even, mul_odd, merge, mode) \
void TurboAssembler::name(Simd128Register dst, Simd128Register src1, \
Simd128Register src2, Simd128Register scratch) { \
mul_even(scratch, src1, src2, Condition(0), Condition(0), \
Condition(mode)); \
mul_odd(dst, src1, src2, Condition(0), Condition(0), Condition(mode)); \
merge(dst, scratch, dst, Condition(0), Condition(0), Condition(mode + 1)); \
}
SIMD_EXT_MUL_LIST(EMIT_SIMD_EXT_MUL)
#undef EMIT_SIMD_EXT_MUL
#undef SIMD_EXT_MUL_LIST
void TurboAssembler::I64x2Mul(Simd128Register dst, Simd128Register src1,
Simd128Register src2) {
Register scratch_1 = r0;

View File

@ -1250,6 +1250,27 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
#undef PROTOTYPE_SIMD_BINOP
#undef SIMD_BINOP_LIST
#define SIMD_EXT_MUL_LIST(V) \
V(I64x2ExtMulLowI32x4S) \
V(I64x2ExtMulHighI32x4S) \
V(I64x2ExtMulLowI32x4U) \
V(I64x2ExtMulHighI32x4U) \
V(I32x4ExtMulLowI16x8S) \
V(I32x4ExtMulHighI16x8S) \
V(I32x4ExtMulLowI16x8U) \
V(I32x4ExtMulHighI16x8U) \
V(I16x8ExtMulLowI8x16S) \
V(I16x8ExtMulHighI8x16S) \
V(I16x8ExtMulLowI8x16U) \
V(I16x8ExtMulHighI8x16U)
#define PROTOTYPE_SIMD_EXT_MUL(name) \
void name(Simd128Register dst, Simd128Register src1, Simd128Register src2, \
Simd128Register scratch);
SIMD_EXT_MUL_LIST(PROTOTYPE_SIMD_EXT_MUL)
#undef PROTOTYPE_SIMD_EXT_MUL
#undef SIMD_EXT_MUL_LIST
// ---------------------------------------------------------------------------
// Pointer compression Support

View File

@ -2727,6 +2727,31 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
SIMD_REPLACE_LANE_LIST(EMIT_SIMD_REPLACE_LANE)
#undef EMIT_SIMD_REPLACE_LANE
#undef SIMD_REPLACE_LANE_LIST
#define SIMD_EXT_MUL_LIST(V) \
V(I64x2ExtMulLowI32x4S) \
V(I64x2ExtMulHighI32x4S) \
V(I64x2ExtMulLowI32x4U) \
V(I64x2ExtMulHighI32x4U) \
V(I32x4ExtMulLowI16x8S) \
V(I32x4ExtMulHighI16x8S) \
V(I32x4ExtMulLowI16x8U) \
V(I32x4ExtMulHighI16x8U) \
V(I16x8ExtMulLowI8x16S) \
V(I16x8ExtMulHighI8x16S) \
V(I16x8ExtMulLowI8x16U) \
V(I16x8ExtMulHighI8x16U)
#define EMIT_SIMD_EXT_MUL(name) \
case kS390_##name: { \
__ name(i.OutputSimd128Register(), i.InputSimd128Register(0), \
i.InputSimd128Register(1), kScratchDoubleReg); \
break; \
}
SIMD_EXT_MUL_LIST(EMIT_SIMD_EXT_MUL)
#undef EMIT_SIMD_EXT_MUL
#undef SIMD_EXT_MUL_LIST
// vector binops
case kS390_F64x2Qfma: {
Simd128Register src0 = i.InputSimd128Register(0);
@ -3166,65 +3191,6 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
Condition(0), Condition(0), Condition(2));
break;
}
#define EXT_MUL(mul_even, mul_odd, merge, mode) \
Simd128Register dst = i.OutputSimd128Register(), \
src0 = i.InputSimd128Register(0), \
src1 = i.InputSimd128Register(1); \
__ mul_even(kScratchDoubleReg, src0, src1, Condition(0), Condition(0), \
Condition(mode)); \
__ mul_odd(dst, src0, src1, Condition(0), Condition(0), Condition(mode)); \
__ merge(dst, kScratchDoubleReg, dst, Condition(0), Condition(0), \
Condition(mode + 1));
case kS390_I64x2ExtMulLowI32x4S: {
EXT_MUL(vme, vmo, vmrl, 2)
break;
}
case kS390_I64x2ExtMulHighI32x4S: {
EXT_MUL(vme, vmo, vmrh, 2)
break;
}
case kS390_I64x2ExtMulLowI32x4U: {
EXT_MUL(vmle, vmlo, vmrl, 2)
break;
}
case kS390_I64x2ExtMulHighI32x4U: {
EXT_MUL(vmle, vmlo, vmrh, 2)
break;
}
case kS390_I32x4ExtMulLowI16x8S: {
EXT_MUL(vme, vmo, vmrl, 1)
break;
}
case kS390_I32x4ExtMulHighI16x8S: {
EXT_MUL(vme, vmo, vmrh, 1)
break;
}
case kS390_I32x4ExtMulLowI16x8U: {
EXT_MUL(vmle, vmlo, vmrl, 1)
break;
}
case kS390_I32x4ExtMulHighI16x8U: {
EXT_MUL(vmle, vmlo, vmrh, 1)
break;
}
case kS390_I16x8ExtMulLowI8x16S: {
EXT_MUL(vme, vmo, vmrl, 0)
break;
}
case kS390_I16x8ExtMulHighI8x16S: {
EXT_MUL(vme, vmo, vmrh, 0)
break;
}
case kS390_I16x8ExtMulLowI8x16U: {
EXT_MUL(vmle, vmlo, vmrl, 0)
break;
}
case kS390_I16x8ExtMulHighI8x16U: {
EXT_MUL(vmle, vmlo, vmrh, 0)
break;
}
#undef EXT_MUL
#define EXT_ADD_PAIRWISE(lane_size, mul_even, mul_odd) \
Simd128Register src = i.InputSimd128Register(0); \
Simd128Register dst = i.OutputSimd128Register(); \

View File

@ -2462,6 +2462,29 @@ SIMD_REPLACE_LANE_LIST(EMIT_SIMD_REPLACE_LANE)
#undef EMIT_SIMD_REPLACE_LANE
#undef SIMD_REPLACE_LANE_LIST
#define SIMD_EXT_MUL_LIST(V) \
V(i64x2_extmul_low_i32x4_s, I64x2ExtMulLowI32x4S) \
V(i64x2_extmul_low_i32x4_u, I64x2ExtMulLowI32x4U) \
V(i64x2_extmul_high_i32x4_s, I64x2ExtMulHighI32x4S) \
V(i64x2_extmul_high_i32x4_u, I64x2ExtMulHighI32x4U) \
V(i32x4_extmul_low_i16x8_s, I32x4ExtMulLowI16x8S) \
V(i32x4_extmul_low_i16x8_u, I32x4ExtMulLowI16x8U) \
V(i32x4_extmul_high_i16x8_s, I32x4ExtMulHighI16x8S) \
V(i32x4_extmul_high_i16x8_u, I32x4ExtMulHighI16x8U) \
V(i16x8_extmul_low_i8x16_s, I16x8ExtMulLowI8x16S) \
V(i16x8_extmul_low_i8x16_u, I16x8ExtMulLowI8x16U) \
V(i16x8_extmul_high_i8x16_s, I16x8ExtMulHighI8x16S) \
V(i16x8_extmul_high_i8x16_u, I16x8ExtMulHighI8x16U)
#define EMIT_SIMD_EXT_MUL(name, op) \
void LiftoffAssembler::emit_##name( \
LiftoffRegister dst, LiftoffRegister src1, LiftoffRegister src2) { \
op(dst.fp(), src1.fp(), src2.fp(), kScratchDoubleReg); \
}
SIMD_EXT_MUL_LIST(EMIT_SIMD_EXT_MUL)
#undef EMIT_SIMD_EXT_MUL
#undef SIMD_EXT_MUL_LIST
void LiftoffAssembler::LoadTransform(LiftoffRegister dst, Register src_addr,
Register offset_reg, uintptr_t offset_imm,
LoadType type,
@ -2510,24 +2533,6 @@ void LiftoffAssembler::emit_i64x2_alltrue(LiftoffRegister dst,
bailout(kSimd, "i64x2_alltrue");
}
void LiftoffAssembler::emit_i64x2_extmul_low_i32x4_s(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2) {
bailout(kSimd, "i64x2_extmul_low_i32x4_s unsupported");
}
void LiftoffAssembler::emit_i64x2_extmul_low_i32x4_u(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2) {
bailout(kSimd, "i64x2_extmul_low_i32x4_u unsupported");
}
void LiftoffAssembler::emit_i64x2_extmul_high_i32x4_s(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2) {
bailout(kSimd, "i64x2_extmul_high_i32x4_s unsupported");
}
void LiftoffAssembler::emit_i64x2_bitmask(LiftoffRegister dst,
LiftoffRegister src) {
I64x2BitMask(dst.gp(), src.fp(), r0, kScratchDoubleReg);
@ -2553,12 +2558,6 @@ void LiftoffAssembler::emit_i64x2_uconvert_i32x4_high(LiftoffRegister dst,
bailout(kSimd, "i64x2_uconvert_i32x4_high");
}
void LiftoffAssembler::emit_i64x2_extmul_high_i32x4_u(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2) {
bailout(kSimd, "i64x2_extmul_high_i32x4_u unsupported");
}
void LiftoffAssembler::emit_i32x4_alltrue(LiftoffRegister dst,
LiftoffRegister src) {
bailout(kSimd, "i32x4_alltrue");
@ -2585,30 +2584,6 @@ void LiftoffAssembler::emit_i32x4_extadd_pairwise_i16x8_u(LiftoffRegister dst,
bailout(kSimd, "i32x4.extadd_pairwise_i16x8_u");
}
void LiftoffAssembler::emit_i32x4_extmul_low_i16x8_s(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2) {
bailout(kSimd, "i32x4_extmul_low_i16x8_s unsupported");
}
void LiftoffAssembler::emit_i32x4_extmul_low_i16x8_u(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2) {
bailout(kSimd, "i32x4_extmul_low_i16x8_u unsupported");
}
void LiftoffAssembler::emit_i32x4_extmul_high_i16x8_s(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2) {
bailout(kSimd, "i32x4_extmul_high_i16x8_s unsupported");
}
void LiftoffAssembler::emit_i32x4_extmul_high_i16x8_u(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2) {
bailout(kSimd, "i32x4_extmul_high_i16x8_u unsupported");
}
void LiftoffAssembler::emit_i16x8_alltrue(LiftoffRegister dst,
LiftoffRegister src) {
bailout(kSimd, "i16x8_alltrue");
@ -2653,36 +2628,12 @@ void LiftoffAssembler::emit_i16x8_extadd_pairwise_i8x16_u(LiftoffRegister dst,
bailout(kSimd, "i16x8.extadd_pairwise_i8x16_u");
}
void LiftoffAssembler::emit_i16x8_extmul_low_i8x16_s(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2) {
bailout(kSimd, "i16x8.extmul_low_i8x16_s unsupported");
}
void LiftoffAssembler::emit_i16x8_extmul_low_i8x16_u(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2) {
bailout(kSimd, "i16x8.extmul_low_i8x16_u unsupported");
}
void LiftoffAssembler::emit_i16x8_extmul_high_i8x16_s(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2) {
bailout(kSimd, "i16x8.extmul_high_i8x16_s unsupported");
}
void LiftoffAssembler::emit_i16x8_q15mulr_sat_s(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2) {
bailout(kSimd, "i16x8_q15mulr_sat_s");
}
void LiftoffAssembler::emit_i16x8_extmul_high_i8x16_u(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2) {
bailout(kSimd, "i16x8_extmul_high_i8x16_u unsupported");
}
void LiftoffAssembler::emit_i8x16_shuffle(LiftoffRegister dst,
LiftoffRegister lhs,
LiftoffRegister rhs,