S390 [liftoff]: Implement simd extend add pairwise

Change-Id: I346ff7d125027caeb14cbfead74eba0bd30c6f2d
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3450900
Reviewed-by: Junliang Yan <junyan@redhat.com>
Commit-Queue: Milad Farazmand <mfarazma@redhat.com>
Cr-Commit-Position: refs/heads/main@{#79018}
This commit is contained in:
Milad Fa 2022-02-09 13:56:03 -05:00 committed by V8 LUCI CQ
parent aae45ca822
commit ca443726db
4 changed files with 95 additions and 54 deletions

View File

@ -5910,6 +5910,46 @@ void TurboAssembler::F32x4DemoteF64x2Zero(Simd128Register dst,
vlvg(dst, scratch4, MemOperand(r0, 3), Condition(2));
}
#define EXT_ADD_PAIRWISE(dst, src, scratch1, scratch2, lane_size, mul_even, \
mul_odd) \
CHECK_NE(src, scratch2); \
vrepi(scratch2, Operand(1), Condition(lane_size)); \
mul_even(scratch1, src, scratch2, Condition(0), Condition(0), \
Condition(lane_size)); \
mul_odd(scratch2, src, scratch2, Condition(0), Condition(0), \
Condition(lane_size)); \
va(dst, scratch1, scratch2, Condition(0), Condition(0), \
Condition(lane_size + 1));
void TurboAssembler::I32x4ExtAddPairwiseI16x8S(Simd128Register dst,
Simd128Register src,
Simd128Register scratch1,
Simd128Register scratch2) {
EXT_ADD_PAIRWISE(dst, src, scratch1, scratch2, 1, vme, vmo)
}
void TurboAssembler::I32x4ExtAddPairwiseI16x8U(Simd128Register dst,
Simd128Register src,
Simd128Register scratch,
Simd128Register scratch2) {
vx(scratch, scratch, scratch, Condition(0), Condition(0), Condition(3));
vsum(dst, src, scratch, Condition(0), Condition(0), Condition(1));
}
void TurboAssembler::I16x8ExtAddPairwiseI8x16S(Simd128Register dst,
Simd128Register src,
Simd128Register scratch1,
Simd128Register scratch2) {
EXT_ADD_PAIRWISE(dst, src, scratch1, scratch2, 0, vme, vmo)
}
void TurboAssembler::I16x8ExtAddPairwiseI8x16U(Simd128Register dst,
Simd128Register src,
Simd128Register scratch1,
Simd128Register scratch2) {
EXT_ADD_PAIRWISE(dst, src, scratch1, scratch2, 0, vmle, vmlo)
}
#undef EXT_ADD_PAIRWISE
// Vector LE Load and Transform instructions.
#ifdef V8_TARGET_BIG_ENDIAN
#define IS_BIG_ENDIAN true

View File

@ -1362,6 +1362,19 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
#undef PROTOTYPE_SIMD_ADD_SUB_SAT
#undef SIMD_ADD_SUB_SAT_LIST
#define SIMD_EXT_ADD_PAIRWISE_LIST(V) \
V(I32x4ExtAddPairwiseI16x8S) \
V(I32x4ExtAddPairwiseI16x8U) \
V(I16x8ExtAddPairwiseI8x16S) \
V(I16x8ExtAddPairwiseI8x16U)
#define PROTOTYPE_SIMD_EXT_ADD_PAIRWISE(name) \
void name(Simd128Register dst, Simd128Register src, \
Simd128Register scratch1, Simd128Register scratch2);
SIMD_EXT_ADD_PAIRWISE_LIST(PROTOTYPE_SIMD_EXT_ADD_PAIRWISE)
#undef PROTOTYPE_SIMD_EXT_ADD_PAIRWISE
#undef SIMD_EXT_ADD_PAIRWISE_LIST
// ---------------------------------------------------------------------------
// Pointer compression Support

View File

@ -2819,6 +2819,22 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
#undef EMIT_SIMD_ADD_SUB_SAT
#undef SIMD_ADD_SUB_SAT_LIST
#define SIMD_EXT_ADD_PAIRWISE_LIST(V) \
V(I32x4ExtAddPairwiseI16x8S) \
V(I32x4ExtAddPairwiseI16x8U) \
V(I16x8ExtAddPairwiseI8x16S) \
V(I16x8ExtAddPairwiseI8x16U)
#define EMIT_SIMD_EXT_ADD_PAIRWISE(name) \
case kS390_##name: { \
__ name(i.OutputSimd128Register(), i.InputSimd128Register(0), \
kScratchDoubleReg, i.ToSimd128Register(instr->TempAt(0))); \
break; \
}
SIMD_EXT_ADD_PAIRWISE_LIST(EMIT_SIMD_EXT_ADD_PAIRWISE)
#undef EMIT_SIMD_EXT_ADD_PAIRWISE
#undef SIMD_EXT_ADD_PAIRWISE_LIST
// vector unary ops
case kS390_F32x4RecipApprox: {
__ mov(kScratchReg, Operand(1));
@ -2989,40 +3005,6 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
Condition(0), Condition(0), Condition(2));
break;
}
#define EXT_ADD_PAIRWISE(lane_size, mul_even, mul_odd) \
Simd128Register src = i.InputSimd128Register(0); \
Simd128Register dst = i.OutputSimd128Register(); \
Simd128Register tempFPReg1 = i.ToSimd128Register(instr->TempAt(0)); \
DCHECK_NE(src, tempFPReg1); \
__ vrepi(tempFPReg1, Operand(1), Condition(lane_size)); \
__ mul_even(kScratchDoubleReg, src, tempFPReg1, Condition(0), Condition(0), \
Condition(lane_size)); \
__ mul_odd(tempFPReg1, src, tempFPReg1, Condition(0), Condition(0), \
Condition(lane_size)); \
__ va(dst, kScratchDoubleReg, tempFPReg1, Condition(0), Condition(0), \
Condition(lane_size + 1));
case kS390_I32x4ExtAddPairwiseI16x8S: {
EXT_ADD_PAIRWISE(1, vme, vmo)
break;
}
case kS390_I32x4ExtAddPairwiseI16x8U: {
Simd128Register src0 = i.InputSimd128Register(0);
Simd128Register dst = i.OutputSimd128Register();
__ vx(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg,
Condition(0), Condition(0), Condition(3));
__ vsum(dst, src0, kScratchDoubleReg, Condition(0), Condition(0),
Condition(1));
break;
}
case kS390_I16x8ExtAddPairwiseI8x16S: {
EXT_ADD_PAIRWISE(0, vme, vmo)
break;
}
case kS390_I16x8ExtAddPairwiseI8x16U: {
EXT_ADD_PAIRWISE(0, vmle, vmlo)
break;
}
#undef EXT_ADD_PAIRWISE
#define Q15_MUL_ROAUND(accumulator, unpack) \
__ unpack(tempFPReg1, src0, Condition(0), Condition(0), Condition(1)); \
__ unpack(accumulator, src1, Condition(0), Condition(0), Condition(1)); \

View File

@ -2541,6 +2541,32 @@ SIMD_ADD_SUB_SAT_LIST(EMIT_SIMD_ADD_SUB_SAT)
#undef EMIT_SIMD_ADD_SUB_SAT
#undef SIMD_ADD_SUB_SAT_LIST
#define SIMD_EXT_ADD_PAIRWISE_LIST(V) \
V(i32x4_extadd_pairwise_i16x8_s, I32x4ExtAddPairwiseI16x8S) \
V(i32x4_extadd_pairwise_i16x8_u, I32x4ExtAddPairwiseI16x8U) \
V(i16x8_extadd_pairwise_i8x16_s, I16x8ExtAddPairwiseI8x16S) \
V(i16x8_extadd_pairwise_i8x16_u, I16x8ExtAddPairwiseI8x16U)
#define EMIT_SIMD_EXT_ADD_PAIRWISE(name, op) \
void LiftoffAssembler::emit_##name(LiftoffRegister dst, \
LiftoffRegister src) { \
Simd128Register src1 = src.fp(); \
Simd128Register dest = dst.fp(); \
/* Make sure dst and temp are unique. */ \
if (dest == src1) { \
dest = GetUnusedRegister(kFpReg, LiftoffRegList::ForRegs(src1)).fp(); \
} \
Simd128Register temp = \
GetUnusedRegister(kFpReg, LiftoffRegList::ForRegs(dest, src1)).fp(); \
op(dest, src1, kScratchDoubleReg, temp); \
if (dest != dst.fp()) { \
vlr(dst.fp(), dest, Condition(0), Condition(0), Condition(0)); \
} \
}
SIMD_EXT_ADD_PAIRWISE_LIST(EMIT_SIMD_EXT_ADD_PAIRWISE)
#undef EMIT_SIMD_EXT_ADD_PAIRWISE
#undef SIMD_EXT_ADD_PAIRWISE_LIST
void LiftoffAssembler::LoadTransform(LiftoffRegister dst, Register src_addr,
Register offset_reg, uintptr_t offset_imm,
LoadType type,
@ -2600,31 +2626,11 @@ void LiftoffAssembler::emit_i32x4_dot_i16x8_s(LiftoffRegister dst,
bailout(kSimd, "i32x4_dot_i16x8_s");
}
void LiftoffAssembler::emit_i32x4_extadd_pairwise_i16x8_s(LiftoffRegister dst,
LiftoffRegister src) {
bailout(kSimd, "i32x4.extadd_pairwise_i16x8_s");
}
void LiftoffAssembler::emit_i32x4_extadd_pairwise_i16x8_u(LiftoffRegister dst,
LiftoffRegister src) {
bailout(kSimd, "i32x4.extadd_pairwise_i16x8_u");
}
void LiftoffAssembler::emit_i16x8_bitmask(LiftoffRegister dst,
LiftoffRegister src) {
I16x8BitMask(dst.gp(), src.fp(), r0, kScratchDoubleReg);
}
void LiftoffAssembler::emit_i16x8_extadd_pairwise_i8x16_s(LiftoffRegister dst,
LiftoffRegister src) {
bailout(kSimd, "i16x8.extadd_pairwise_i8x16_s");
}
void LiftoffAssembler::emit_i16x8_extadd_pairwise_i8x16_u(LiftoffRegister dst,
LiftoffRegister src) {
bailout(kSimd, "i16x8.extadd_pairwise_i8x16_u");
}
void LiftoffAssembler::emit_i16x8_q15mulr_sat_s(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2) {