PPC [liftoff]: Implement simd replace lane ops

Change-Id: I4bbe5be6c6f13ee5664fa231d8dd9d59aa0ef579
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3792166
Reviewed-by: Junliang Yan <junyan@redhat.com>
Commit-Queue: Milad Farazmand <mfarazma@redhat.com>
Cr-Commit-Position: refs/heads/main@{#82051}
This commit is contained in:
Milad Fa 2022-07-28 13:20:39 -04:00 committed by V8 LUCI CQ
parent cd1a6e7353
commit c373fc7be5
4 changed files with 171 additions and 90 deletions

View File

@ -3599,6 +3599,92 @@ void TurboAssembler::I8x16ExtractLaneS(Register dst, Simd128Register src,
extsb(dst, dst);
}
void TurboAssembler::F64x2ReplaceLane(Simd128Register dst, Simd128Register src1,
DoubleRegister src2, uint8_t imm_lane_idx,
Register scratch1,
Simd128Register scratch2) {
constexpr int lane_width_in_bytes = 8;
if (src1 != dst) {
vor(dst, src1, src1);
}
MovDoubleToInt64(scratch1, src2);
if (CpuFeatures::IsSupported(PPC_10_PLUS)) {
vinsd(dst, scratch1, Operand((1 - imm_lane_idx) * lane_width_in_bytes));
} else {
mtvsrd(scratch2, scratch1);
vinsertd(dst, scratch2, Operand((1 - imm_lane_idx) * lane_width_in_bytes));
}
}
void TurboAssembler::F32x4ReplaceLane(Simd128Register dst, Simd128Register src1,
DoubleRegister src2, uint8_t imm_lane_idx,
Register scratch1,
DoubleRegister scratch2,
Simd128Register scratch3) {
constexpr int lane_width_in_bytes = 4;
if (src1 != dst) {
vor(dst, src1, src1);
}
MovFloatToInt(scratch1, src2, scratch2);
if (CpuFeatures::IsSupported(PPC_10_PLUS)) {
vinsw(dst, scratch1, Operand((3 - imm_lane_idx) * lane_width_in_bytes));
} else {
mtvsrd(scratch3, scratch1);
vinsertw(dst, scratch3, Operand((3 - imm_lane_idx) * lane_width_in_bytes));
}
}
void TurboAssembler::I64x2ReplaceLane(Simd128Register dst, Simd128Register src1,
Register src2, uint8_t imm_lane_idx,
Simd128Register scratch) {
constexpr int lane_width_in_bytes = 8;
if (src1 != dst) {
vor(dst, src1, src1);
}
if (CpuFeatures::IsSupported(PPC_10_PLUS)) {
vinsd(dst, src2, Operand((1 - imm_lane_idx) * lane_width_in_bytes));
} else {
mtvsrd(scratch, src2);
vinsertd(dst, scratch, Operand((1 - imm_lane_idx) * lane_width_in_bytes));
}
}
void TurboAssembler::I32x4ReplaceLane(Simd128Register dst, Simd128Register src1,
Register src2, uint8_t imm_lane_idx,
Simd128Register scratch) {
constexpr int lane_width_in_bytes = 4;
if (src1 != dst) {
vor(dst, src1, src1);
}
if (CpuFeatures::IsSupported(PPC_10_PLUS)) {
vinsw(dst, src2, Operand((3 - imm_lane_idx) * lane_width_in_bytes));
} else {
mtvsrd(scratch, src2);
vinsertw(dst, scratch, Operand((3 - imm_lane_idx) * lane_width_in_bytes));
}
}
void TurboAssembler::I16x8ReplaceLane(Simd128Register dst, Simd128Register src1,
Register src2, uint8_t imm_lane_idx,
Simd128Register scratch) {
constexpr int lane_width_in_bytes = 2;
if (src1 != dst) {
vor(dst, src1, src1);
}
mtvsrd(scratch, src2);
vinserth(dst, scratch, Operand((7 - imm_lane_idx) * lane_width_in_bytes));
}
void TurboAssembler::I8x16ReplaceLane(Simd128Register dst, Simd128Register src1,
Register src2, uint8_t imm_lane_idx,
Simd128Register scratch) {
if (src1 != dst) {
vor(dst, src1, src1);
}
mtvsrd(scratch, src2);
vinsertb(dst, scratch, Operand(15 - imm_lane_idx));
}
Register GetRegisterThatIsNotOneOf(Register reg1, Register reg2, Register reg3,
Register reg4, Register reg5,
Register reg6) {

View File

@ -1102,6 +1102,25 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
uint8_t imm_lane_idx, Simd128Register scratch);
void I8x16ExtractLaneS(Register dst, Simd128Register src,
uint8_t imm_lane_idx, Simd128Register scratch);
void F64x2ReplaceLane(Simd128Register dst, Simd128Register src1,
DoubleRegister src2, uint8_t imm_lane_idx,
Register scratch1, Simd128Register scratch2);
void F32x4ReplaceLane(Simd128Register dst, Simd128Register src1,
DoubleRegister src2, uint8_t imm_lane_idx,
Register scratch1, DoubleRegister scratch2,
Simd128Register scratch3);
void I64x2ReplaceLane(Simd128Register dst, Simd128Register src1,
Register src2, uint8_t imm_lane_idx,
Simd128Register scratch);
void I32x4ReplaceLane(Simd128Register dst, Simd128Register src1,
Register src2, uint8_t imm_lane_idx,
Simd128Register scratch);
void I16x8ReplaceLane(Simd128Register dst, Simd128Register src1,
Register src2, uint8_t imm_lane_idx,
Simd128Register scratch);
void I8x16ReplaceLane(Simd128Register dst, Simd128Register src1,
Register src2, uint8_t imm_lane_idx,
Simd128Register scratch);
private:
static const int kSmiShift = kSmiTagSize + kSmiShiftSize;

View File

@ -2263,74 +2263,44 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
}
case kPPC_F64x2ReplaceLane: {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
constexpr int lane_width_in_bytes = 8;
Simd128Register dst = i.OutputSimd128Register();
__ MovDoubleToInt64(r0, i.InputDoubleRegister(2));
if (CpuFeatures::IsSupported(PPC_10_PLUS)) {
__ vinsd(dst, r0, Operand((1 - i.InputInt8(1)) * lane_width_in_bytes));
} else {
__ mtvsrd(kScratchSimd128Reg, r0);
__ vinsertd(dst, kScratchSimd128Reg,
Operand((1 - i.InputInt8(1)) * lane_width_in_bytes));
}
__ F64x2ReplaceLane(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputDoubleRegister(2), i.InputInt8(1), kScratchReg,
kScratchSimd128Reg);
break;
}
case kPPC_F32x4ReplaceLane: {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
constexpr int lane_width_in_bytes = 4;
Simd128Register dst = i.OutputSimd128Register();
__ MovFloatToInt(r0, i.InputDoubleRegister(2), kScratchDoubleReg);
if (CpuFeatures::IsSupported(PPC_10_PLUS)) {
__ vinsw(dst, r0, Operand((3 - i.InputInt8(1)) * lane_width_in_bytes));
} else {
__ mtvsrd(kScratchSimd128Reg, r0);
__ vinsertw(dst, kScratchSimd128Reg,
Operand((3 - i.InputInt8(1)) * lane_width_in_bytes));
}
__ F32x4ReplaceLane(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputDoubleRegister(2), i.InputInt8(1), kScratchReg,
kScratchDoubleReg, kScratchSimd128Reg);
break;
}
case kPPC_I64x2ReplaceLane: {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
constexpr int lane_width_in_bytes = 8;
Simd128Register dst = i.OutputSimd128Register();
if (CpuFeatures::IsSupported(PPC_10_PLUS)) {
__ vinsd(dst, i.InputRegister(2),
Operand((1 - i.InputInt8(1)) * lane_width_in_bytes));
} else {
__ mtvsrd(kScratchSimd128Reg, i.InputRegister(2));
__ vinsertd(dst, kScratchSimd128Reg,
Operand((1 - i.InputInt8(1)) * lane_width_in_bytes));
}
__ I64x2ReplaceLane(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputRegister(2), i.InputInt8(1),
kScratchSimd128Reg);
break;
}
case kPPC_I32x4ReplaceLane: {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
constexpr int lane_width_in_bytes = 4;
Simd128Register dst = i.OutputSimd128Register();
if (CpuFeatures::IsSupported(PPC_10_PLUS)) {
__ vinsw(dst, i.InputRegister(2),
Operand((3 - i.InputInt8(1)) * lane_width_in_bytes));
} else {
__ mtvsrd(kScratchSimd128Reg, i.InputRegister(2));
__ vinsertw(dst, kScratchSimd128Reg,
Operand((3 - i.InputInt8(1)) * lane_width_in_bytes));
}
__ I32x4ReplaceLane(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputRegister(2), i.InputInt8(1),
kScratchSimd128Reg);
break;
}
case kPPC_I16x8ReplaceLane: {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
constexpr int lane_width_in_bytes = 2;
Simd128Register dst = i.OutputSimd128Register();
__ mtvsrd(kScratchSimd128Reg, i.InputRegister(2));
__ vinserth(dst, kScratchSimd128Reg,
Operand((7 - i.InputInt8(1)) * lane_width_in_bytes));
__ I16x8ReplaceLane(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputRegister(2), i.InputInt8(1),
kScratchSimd128Reg);
break;
}
case kPPC_I8x16ReplaceLane: {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
Simd128Register dst = i.OutputSimd128Register();
__ mtvsrd(kScratchSimd128Reg, i.InputRegister(2));
__ vinsertb(dst, kScratchSimd128Reg, Operand(15 - i.InputInt8(1)));
__ I8x16ReplaceLane(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputRegister(2), i.InputInt8(1),
kScratchSimd128Reg);
break;
}
case kPPC_F64x2Add: {

View File

@ -1826,6 +1826,54 @@ void LiftoffAssembler::emit_i8x16_extract_lane_s(LiftoffRegister dst,
kScratchSimd128Reg);
}
void LiftoffAssembler::emit_f64x2_replace_lane(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2,
uint8_t imm_lane_idx) {
F64x2ReplaceLane(dst.fp().toSimd(), src1.fp().toSimd(), src2.fp(),
imm_lane_idx, r0, kScratchSimd128Reg);
}
void LiftoffAssembler::emit_f32x4_replace_lane(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2,
uint8_t imm_lane_idx) {
F32x4ReplaceLane(dst.fp().toSimd(), src1.fp().toSimd(), src2.fp(),
imm_lane_idx, r0, kScratchDoubleReg, kScratchSimd128Reg);
}
void LiftoffAssembler::emit_i64x2_replace_lane(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2,
uint8_t imm_lane_idx) {
I64x2ReplaceLane(dst.fp().toSimd(), src1.fp().toSimd(), src2.gp(),
imm_lane_idx, kScratchSimd128Reg);
}
void LiftoffAssembler::emit_i32x4_replace_lane(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2,
uint8_t imm_lane_idx) {
I32x4ReplaceLane(dst.fp().toSimd(), src1.fp().toSimd(), src2.gp(),
imm_lane_idx, kScratchSimd128Reg);
}
void LiftoffAssembler::emit_i16x8_replace_lane(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2,
uint8_t imm_lane_idx) {
I16x8ReplaceLane(dst.fp().toSimd(), src1.fp().toSimd(), src2.gp(),
imm_lane_idx, kScratchSimd128Reg);
}
void LiftoffAssembler::emit_i8x16_replace_lane(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2,
uint8_t imm_lane_idx) {
I8x16ReplaceLane(dst.fp().toSimd(), src1.fp().toSimd(), src2.gp(),
imm_lane_idx, kScratchSimd128Reg);
}
void LiftoffAssembler::LoadTransform(LiftoffRegister dst, Register src_addr,
Register offset_reg, uintptr_t offset_imm,
LoadType type,
@ -1895,13 +1943,6 @@ void LiftoffAssembler::emit_s128_relaxed_laneselect(LiftoffRegister dst,
bailout(kRelaxedSimd, "emit_s128_relaxed_laneselect");
}
void LiftoffAssembler::emit_f64x2_replace_lane(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2,
uint8_t imm_lane_idx) {
bailout(kUnsupportedArchitecture, "emit_f64x2replacelane");
}
void LiftoffAssembler::emit_f64x2_abs(LiftoffRegister dst,
LiftoffRegister src) {
bailout(kUnsupportedArchitecture, "emit_f64x2_abs");
@ -2008,13 +2049,6 @@ void LiftoffAssembler::emit_f64x2_promote_low_f32x4(LiftoffRegister dst,
bailout(kSimd, "f64x2.promote_low_f32x4");
}
void LiftoffAssembler::emit_f32x4_replace_lane(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2,
uint8_t imm_lane_idx) {
bailout(kUnsupportedArchitecture, "emit_f32x4replacelane");
}
void LiftoffAssembler::emit_f32x4_abs(LiftoffRegister dst,
LiftoffRegister src) {
bailout(kUnsupportedArchitecture, "emit_f32x4_abs");
@ -2106,13 +2140,6 @@ void LiftoffAssembler::emit_f32x4_pmax(LiftoffRegister dst, LiftoffRegister lhs,
bailout(kSimd, "pmax unimplemented");
}
void LiftoffAssembler::emit_i64x2_replace_lane(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2,
uint8_t imm_lane_idx) {
bailout(kUnsupportedArchitecture, "emit_i64x2replacelane");
}
void LiftoffAssembler::emit_i64x2_neg(LiftoffRegister dst,
LiftoffRegister src) {
bailout(kUnsupportedArchitecture, "emit_i64x2neg");
@ -2219,13 +2246,6 @@ void LiftoffAssembler::emit_i64x2_extmul_high_i32x4_u(LiftoffRegister dst,
bailout(kSimd, "i64x2_extmul_high_i32x4_u unsupported");
}
void LiftoffAssembler::emit_i32x4_replace_lane(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2,
uint8_t imm_lane_idx) {
bailout(kUnsupportedArchitecture, "emit_i32x4replacelane");
}
void LiftoffAssembler::emit_i32x4_neg(LiftoffRegister dst,
LiftoffRegister src) {
bailout(kUnsupportedArchitecture, "emit_i32x4neg");
@ -2462,13 +2482,6 @@ void LiftoffAssembler::emit_i16x8_max_u(LiftoffRegister dst,
bailout(kUnsupportedArchitecture, "emit_i16x8_max_u");
}
void LiftoffAssembler::emit_i16x8_replace_lane(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2,
uint8_t imm_lane_idx) {
bailout(kUnsupportedArchitecture, "emit_i16x8replacelane");
}
void LiftoffAssembler::emit_i16x8_extadd_pairwise_i8x16_s(LiftoffRegister dst,
LiftoffRegister src) {
bailout(kSimd, "i16x8.extadd_pairwise_i8x16_s");
@ -2541,13 +2554,6 @@ void LiftoffAssembler::emit_i8x16_popcnt(LiftoffRegister dst,
bailout(kSimd, "i8x16.popcnt");
}
void LiftoffAssembler::emit_i8x16_replace_lane(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2,
uint8_t imm_lane_idx) {
bailout(kUnsupportedArchitecture, "emit_i8x16replacelane");
}
void LiftoffAssembler::emit_i8x16_neg(LiftoffRegister dst,
LiftoffRegister src) {
bailout(kUnsupportedArchitecture, "emit_i8x16neg");