PPC [liftoff]: Implement simd replace lane ops
Change-Id: I4bbe5be6c6f13ee5664fa231d8dd9d59aa0ef579 Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3792166 Reviewed-by: Junliang Yan <junyan@redhat.com> Commit-Queue: Milad Farazmand <mfarazma@redhat.com> Cr-Commit-Position: refs/heads/main@{#82051}
This commit is contained in:
parent
cd1a6e7353
commit
c373fc7be5
@ -3599,6 +3599,92 @@ void TurboAssembler::I8x16ExtractLaneS(Register dst, Simd128Register src,
|
||||
extsb(dst, dst);
|
||||
}
|
||||
|
||||
void TurboAssembler::F64x2ReplaceLane(Simd128Register dst, Simd128Register src1,
|
||||
DoubleRegister src2, uint8_t imm_lane_idx,
|
||||
Register scratch1,
|
||||
Simd128Register scratch2) {
|
||||
constexpr int lane_width_in_bytes = 8;
|
||||
if (src1 != dst) {
|
||||
vor(dst, src1, src1);
|
||||
}
|
||||
MovDoubleToInt64(scratch1, src2);
|
||||
if (CpuFeatures::IsSupported(PPC_10_PLUS)) {
|
||||
vinsd(dst, scratch1, Operand((1 - imm_lane_idx) * lane_width_in_bytes));
|
||||
} else {
|
||||
mtvsrd(scratch2, scratch1);
|
||||
vinsertd(dst, scratch2, Operand((1 - imm_lane_idx) * lane_width_in_bytes));
|
||||
}
|
||||
}
|
||||
|
||||
void TurboAssembler::F32x4ReplaceLane(Simd128Register dst, Simd128Register src1,
|
||||
DoubleRegister src2, uint8_t imm_lane_idx,
|
||||
Register scratch1,
|
||||
DoubleRegister scratch2,
|
||||
Simd128Register scratch3) {
|
||||
constexpr int lane_width_in_bytes = 4;
|
||||
if (src1 != dst) {
|
||||
vor(dst, src1, src1);
|
||||
}
|
||||
MovFloatToInt(scratch1, src2, scratch2);
|
||||
if (CpuFeatures::IsSupported(PPC_10_PLUS)) {
|
||||
vinsw(dst, scratch1, Operand((3 - imm_lane_idx) * lane_width_in_bytes));
|
||||
} else {
|
||||
mtvsrd(scratch3, scratch1);
|
||||
vinsertw(dst, scratch3, Operand((3 - imm_lane_idx) * lane_width_in_bytes));
|
||||
}
|
||||
}
|
||||
|
||||
void TurboAssembler::I64x2ReplaceLane(Simd128Register dst, Simd128Register src1,
|
||||
Register src2, uint8_t imm_lane_idx,
|
||||
Simd128Register scratch) {
|
||||
constexpr int lane_width_in_bytes = 8;
|
||||
if (src1 != dst) {
|
||||
vor(dst, src1, src1);
|
||||
}
|
||||
if (CpuFeatures::IsSupported(PPC_10_PLUS)) {
|
||||
vinsd(dst, src2, Operand((1 - imm_lane_idx) * lane_width_in_bytes));
|
||||
} else {
|
||||
mtvsrd(scratch, src2);
|
||||
vinsertd(dst, scratch, Operand((1 - imm_lane_idx) * lane_width_in_bytes));
|
||||
}
|
||||
}
|
||||
|
||||
void TurboAssembler::I32x4ReplaceLane(Simd128Register dst, Simd128Register src1,
|
||||
Register src2, uint8_t imm_lane_idx,
|
||||
Simd128Register scratch) {
|
||||
constexpr int lane_width_in_bytes = 4;
|
||||
if (src1 != dst) {
|
||||
vor(dst, src1, src1);
|
||||
}
|
||||
if (CpuFeatures::IsSupported(PPC_10_PLUS)) {
|
||||
vinsw(dst, src2, Operand((3 - imm_lane_idx) * lane_width_in_bytes));
|
||||
} else {
|
||||
mtvsrd(scratch, src2);
|
||||
vinsertw(dst, scratch, Operand((3 - imm_lane_idx) * lane_width_in_bytes));
|
||||
}
|
||||
}
|
||||
|
||||
void TurboAssembler::I16x8ReplaceLane(Simd128Register dst, Simd128Register src1,
|
||||
Register src2, uint8_t imm_lane_idx,
|
||||
Simd128Register scratch) {
|
||||
constexpr int lane_width_in_bytes = 2;
|
||||
if (src1 != dst) {
|
||||
vor(dst, src1, src1);
|
||||
}
|
||||
mtvsrd(scratch, src2);
|
||||
vinserth(dst, scratch, Operand((7 - imm_lane_idx) * lane_width_in_bytes));
|
||||
}
|
||||
|
||||
void TurboAssembler::I8x16ReplaceLane(Simd128Register dst, Simd128Register src1,
|
||||
Register src2, uint8_t imm_lane_idx,
|
||||
Simd128Register scratch) {
|
||||
if (src1 != dst) {
|
||||
vor(dst, src1, src1);
|
||||
}
|
||||
mtvsrd(scratch, src2);
|
||||
vinsertb(dst, scratch, Operand(15 - imm_lane_idx));
|
||||
}
|
||||
|
||||
Register GetRegisterThatIsNotOneOf(Register reg1, Register reg2, Register reg3,
|
||||
Register reg4, Register reg5,
|
||||
Register reg6) {
|
||||
|
@ -1102,6 +1102,25 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
|
||||
uint8_t imm_lane_idx, Simd128Register scratch);
|
||||
void I8x16ExtractLaneS(Register dst, Simd128Register src,
|
||||
uint8_t imm_lane_idx, Simd128Register scratch);
|
||||
void F64x2ReplaceLane(Simd128Register dst, Simd128Register src1,
|
||||
DoubleRegister src2, uint8_t imm_lane_idx,
|
||||
Register scratch1, Simd128Register scratch2);
|
||||
void F32x4ReplaceLane(Simd128Register dst, Simd128Register src1,
|
||||
DoubleRegister src2, uint8_t imm_lane_idx,
|
||||
Register scratch1, DoubleRegister scratch2,
|
||||
Simd128Register scratch3);
|
||||
void I64x2ReplaceLane(Simd128Register dst, Simd128Register src1,
|
||||
Register src2, uint8_t imm_lane_idx,
|
||||
Simd128Register scratch);
|
||||
void I32x4ReplaceLane(Simd128Register dst, Simd128Register src1,
|
||||
Register src2, uint8_t imm_lane_idx,
|
||||
Simd128Register scratch);
|
||||
void I16x8ReplaceLane(Simd128Register dst, Simd128Register src1,
|
||||
Register src2, uint8_t imm_lane_idx,
|
||||
Simd128Register scratch);
|
||||
void I8x16ReplaceLane(Simd128Register dst, Simd128Register src1,
|
||||
Register src2, uint8_t imm_lane_idx,
|
||||
Simd128Register scratch);
|
||||
|
||||
private:
|
||||
static const int kSmiShift = kSmiTagSize + kSmiShiftSize;
|
||||
|
@ -2263,74 +2263,44 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
|
||||
}
|
||||
case kPPC_F64x2ReplaceLane: {
|
||||
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
|
||||
constexpr int lane_width_in_bytes = 8;
|
||||
Simd128Register dst = i.OutputSimd128Register();
|
||||
__ MovDoubleToInt64(r0, i.InputDoubleRegister(2));
|
||||
if (CpuFeatures::IsSupported(PPC_10_PLUS)) {
|
||||
__ vinsd(dst, r0, Operand((1 - i.InputInt8(1)) * lane_width_in_bytes));
|
||||
} else {
|
||||
__ mtvsrd(kScratchSimd128Reg, r0);
|
||||
__ vinsertd(dst, kScratchSimd128Reg,
|
||||
Operand((1 - i.InputInt8(1)) * lane_width_in_bytes));
|
||||
}
|
||||
__ F64x2ReplaceLane(i.OutputSimd128Register(), i.InputSimd128Register(0),
|
||||
i.InputDoubleRegister(2), i.InputInt8(1), kScratchReg,
|
||||
kScratchSimd128Reg);
|
||||
break;
|
||||
}
|
||||
case kPPC_F32x4ReplaceLane: {
|
||||
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
|
||||
constexpr int lane_width_in_bytes = 4;
|
||||
Simd128Register dst = i.OutputSimd128Register();
|
||||
__ MovFloatToInt(r0, i.InputDoubleRegister(2), kScratchDoubleReg);
|
||||
if (CpuFeatures::IsSupported(PPC_10_PLUS)) {
|
||||
__ vinsw(dst, r0, Operand((3 - i.InputInt8(1)) * lane_width_in_bytes));
|
||||
} else {
|
||||
__ mtvsrd(kScratchSimd128Reg, r0);
|
||||
__ vinsertw(dst, kScratchSimd128Reg,
|
||||
Operand((3 - i.InputInt8(1)) * lane_width_in_bytes));
|
||||
}
|
||||
__ F32x4ReplaceLane(i.OutputSimd128Register(), i.InputSimd128Register(0),
|
||||
i.InputDoubleRegister(2), i.InputInt8(1), kScratchReg,
|
||||
kScratchDoubleReg, kScratchSimd128Reg);
|
||||
break;
|
||||
}
|
||||
case kPPC_I64x2ReplaceLane: {
|
||||
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
|
||||
constexpr int lane_width_in_bytes = 8;
|
||||
Simd128Register dst = i.OutputSimd128Register();
|
||||
if (CpuFeatures::IsSupported(PPC_10_PLUS)) {
|
||||
__ vinsd(dst, i.InputRegister(2),
|
||||
Operand((1 - i.InputInt8(1)) * lane_width_in_bytes));
|
||||
} else {
|
||||
__ mtvsrd(kScratchSimd128Reg, i.InputRegister(2));
|
||||
__ vinsertd(dst, kScratchSimd128Reg,
|
||||
Operand((1 - i.InputInt8(1)) * lane_width_in_bytes));
|
||||
}
|
||||
__ I64x2ReplaceLane(i.OutputSimd128Register(), i.InputSimd128Register(0),
|
||||
i.InputRegister(2), i.InputInt8(1),
|
||||
kScratchSimd128Reg);
|
||||
break;
|
||||
}
|
||||
case kPPC_I32x4ReplaceLane: {
|
||||
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
|
||||
constexpr int lane_width_in_bytes = 4;
|
||||
Simd128Register dst = i.OutputSimd128Register();
|
||||
if (CpuFeatures::IsSupported(PPC_10_PLUS)) {
|
||||
__ vinsw(dst, i.InputRegister(2),
|
||||
Operand((3 - i.InputInt8(1)) * lane_width_in_bytes));
|
||||
} else {
|
||||
__ mtvsrd(kScratchSimd128Reg, i.InputRegister(2));
|
||||
__ vinsertw(dst, kScratchSimd128Reg,
|
||||
Operand((3 - i.InputInt8(1)) * lane_width_in_bytes));
|
||||
}
|
||||
__ I32x4ReplaceLane(i.OutputSimd128Register(), i.InputSimd128Register(0),
|
||||
i.InputRegister(2), i.InputInt8(1),
|
||||
kScratchSimd128Reg);
|
||||
break;
|
||||
}
|
||||
case kPPC_I16x8ReplaceLane: {
|
||||
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
|
||||
constexpr int lane_width_in_bytes = 2;
|
||||
Simd128Register dst = i.OutputSimd128Register();
|
||||
__ mtvsrd(kScratchSimd128Reg, i.InputRegister(2));
|
||||
__ vinserth(dst, kScratchSimd128Reg,
|
||||
Operand((7 - i.InputInt8(1)) * lane_width_in_bytes));
|
||||
__ I16x8ReplaceLane(i.OutputSimd128Register(), i.InputSimd128Register(0),
|
||||
i.InputRegister(2), i.InputInt8(1),
|
||||
kScratchSimd128Reg);
|
||||
break;
|
||||
}
|
||||
case kPPC_I8x16ReplaceLane: {
|
||||
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
|
||||
Simd128Register dst = i.OutputSimd128Register();
|
||||
__ mtvsrd(kScratchSimd128Reg, i.InputRegister(2));
|
||||
__ vinsertb(dst, kScratchSimd128Reg, Operand(15 - i.InputInt8(1)));
|
||||
__ I8x16ReplaceLane(i.OutputSimd128Register(), i.InputSimd128Register(0),
|
||||
i.InputRegister(2), i.InputInt8(1),
|
||||
kScratchSimd128Reg);
|
||||
break;
|
||||
}
|
||||
case kPPC_F64x2Add: {
|
||||
|
@ -1826,6 +1826,54 @@ void LiftoffAssembler::emit_i8x16_extract_lane_s(LiftoffRegister dst,
|
||||
kScratchSimd128Reg);
|
||||
}
|
||||
|
||||
void LiftoffAssembler::emit_f64x2_replace_lane(LiftoffRegister dst,
|
||||
LiftoffRegister src1,
|
||||
LiftoffRegister src2,
|
||||
uint8_t imm_lane_idx) {
|
||||
F64x2ReplaceLane(dst.fp().toSimd(), src1.fp().toSimd(), src2.fp(),
|
||||
imm_lane_idx, r0, kScratchSimd128Reg);
|
||||
}
|
||||
|
||||
void LiftoffAssembler::emit_f32x4_replace_lane(LiftoffRegister dst,
|
||||
LiftoffRegister src1,
|
||||
LiftoffRegister src2,
|
||||
uint8_t imm_lane_idx) {
|
||||
F32x4ReplaceLane(dst.fp().toSimd(), src1.fp().toSimd(), src2.fp(),
|
||||
imm_lane_idx, r0, kScratchDoubleReg, kScratchSimd128Reg);
|
||||
}
|
||||
|
||||
void LiftoffAssembler::emit_i64x2_replace_lane(LiftoffRegister dst,
|
||||
LiftoffRegister src1,
|
||||
LiftoffRegister src2,
|
||||
uint8_t imm_lane_idx) {
|
||||
I64x2ReplaceLane(dst.fp().toSimd(), src1.fp().toSimd(), src2.gp(),
|
||||
imm_lane_idx, kScratchSimd128Reg);
|
||||
}
|
||||
|
||||
void LiftoffAssembler::emit_i32x4_replace_lane(LiftoffRegister dst,
|
||||
LiftoffRegister src1,
|
||||
LiftoffRegister src2,
|
||||
uint8_t imm_lane_idx) {
|
||||
I32x4ReplaceLane(dst.fp().toSimd(), src1.fp().toSimd(), src2.gp(),
|
||||
imm_lane_idx, kScratchSimd128Reg);
|
||||
}
|
||||
|
||||
void LiftoffAssembler::emit_i16x8_replace_lane(LiftoffRegister dst,
|
||||
LiftoffRegister src1,
|
||||
LiftoffRegister src2,
|
||||
uint8_t imm_lane_idx) {
|
||||
I16x8ReplaceLane(dst.fp().toSimd(), src1.fp().toSimd(), src2.gp(),
|
||||
imm_lane_idx, kScratchSimd128Reg);
|
||||
}
|
||||
|
||||
void LiftoffAssembler::emit_i8x16_replace_lane(LiftoffRegister dst,
|
||||
LiftoffRegister src1,
|
||||
LiftoffRegister src2,
|
||||
uint8_t imm_lane_idx) {
|
||||
I8x16ReplaceLane(dst.fp().toSimd(), src1.fp().toSimd(), src2.gp(),
|
||||
imm_lane_idx, kScratchSimd128Reg);
|
||||
}
|
||||
|
||||
void LiftoffAssembler::LoadTransform(LiftoffRegister dst, Register src_addr,
|
||||
Register offset_reg, uintptr_t offset_imm,
|
||||
LoadType type,
|
||||
@ -1895,13 +1943,6 @@ void LiftoffAssembler::emit_s128_relaxed_laneselect(LiftoffRegister dst,
|
||||
bailout(kRelaxedSimd, "emit_s128_relaxed_laneselect");
|
||||
}
|
||||
|
||||
void LiftoffAssembler::emit_f64x2_replace_lane(LiftoffRegister dst,
|
||||
LiftoffRegister src1,
|
||||
LiftoffRegister src2,
|
||||
uint8_t imm_lane_idx) {
|
||||
bailout(kUnsupportedArchitecture, "emit_f64x2replacelane");
|
||||
}
|
||||
|
||||
void LiftoffAssembler::emit_f64x2_abs(LiftoffRegister dst,
|
||||
LiftoffRegister src) {
|
||||
bailout(kUnsupportedArchitecture, "emit_f64x2_abs");
|
||||
@ -2008,13 +2049,6 @@ void LiftoffAssembler::emit_f64x2_promote_low_f32x4(LiftoffRegister dst,
|
||||
bailout(kSimd, "f64x2.promote_low_f32x4");
|
||||
}
|
||||
|
||||
void LiftoffAssembler::emit_f32x4_replace_lane(LiftoffRegister dst,
|
||||
LiftoffRegister src1,
|
||||
LiftoffRegister src2,
|
||||
uint8_t imm_lane_idx) {
|
||||
bailout(kUnsupportedArchitecture, "emit_f32x4replacelane");
|
||||
}
|
||||
|
||||
void LiftoffAssembler::emit_f32x4_abs(LiftoffRegister dst,
|
||||
LiftoffRegister src) {
|
||||
bailout(kUnsupportedArchitecture, "emit_f32x4_abs");
|
||||
@ -2106,13 +2140,6 @@ void LiftoffAssembler::emit_f32x4_pmax(LiftoffRegister dst, LiftoffRegister lhs,
|
||||
bailout(kSimd, "pmax unimplemented");
|
||||
}
|
||||
|
||||
void LiftoffAssembler::emit_i64x2_replace_lane(LiftoffRegister dst,
|
||||
LiftoffRegister src1,
|
||||
LiftoffRegister src2,
|
||||
uint8_t imm_lane_idx) {
|
||||
bailout(kUnsupportedArchitecture, "emit_i64x2replacelane");
|
||||
}
|
||||
|
||||
void LiftoffAssembler::emit_i64x2_neg(LiftoffRegister dst,
|
||||
LiftoffRegister src) {
|
||||
bailout(kUnsupportedArchitecture, "emit_i64x2neg");
|
||||
@ -2219,13 +2246,6 @@ void LiftoffAssembler::emit_i64x2_extmul_high_i32x4_u(LiftoffRegister dst,
|
||||
bailout(kSimd, "i64x2_extmul_high_i32x4_u unsupported");
|
||||
}
|
||||
|
||||
void LiftoffAssembler::emit_i32x4_replace_lane(LiftoffRegister dst,
|
||||
LiftoffRegister src1,
|
||||
LiftoffRegister src2,
|
||||
uint8_t imm_lane_idx) {
|
||||
bailout(kUnsupportedArchitecture, "emit_i32x4replacelane");
|
||||
}
|
||||
|
||||
void LiftoffAssembler::emit_i32x4_neg(LiftoffRegister dst,
|
||||
LiftoffRegister src) {
|
||||
bailout(kUnsupportedArchitecture, "emit_i32x4neg");
|
||||
@ -2462,13 +2482,6 @@ void LiftoffAssembler::emit_i16x8_max_u(LiftoffRegister dst,
|
||||
bailout(kUnsupportedArchitecture, "emit_i16x8_max_u");
|
||||
}
|
||||
|
||||
void LiftoffAssembler::emit_i16x8_replace_lane(LiftoffRegister dst,
|
||||
LiftoffRegister src1,
|
||||
LiftoffRegister src2,
|
||||
uint8_t imm_lane_idx) {
|
||||
bailout(kUnsupportedArchitecture, "emit_i16x8replacelane");
|
||||
}
|
||||
|
||||
void LiftoffAssembler::emit_i16x8_extadd_pairwise_i8x16_s(LiftoffRegister dst,
|
||||
LiftoffRegister src) {
|
||||
bailout(kSimd, "i16x8.extadd_pairwise_i8x16_s");
|
||||
@ -2541,13 +2554,6 @@ void LiftoffAssembler::emit_i8x16_popcnt(LiftoffRegister dst,
|
||||
bailout(kSimd, "i8x16.popcnt");
|
||||
}
|
||||
|
||||
void LiftoffAssembler::emit_i8x16_replace_lane(LiftoffRegister dst,
|
||||
LiftoffRegister src1,
|
||||
LiftoffRegister src2,
|
||||
uint8_t imm_lane_idx) {
|
||||
bailout(kUnsupportedArchitecture, "emit_i8x16replacelane");
|
||||
}
|
||||
|
||||
void LiftoffAssembler::emit_i8x16_neg(LiftoffRegister dst,
|
||||
LiftoffRegister src) {
|
||||
bailout(kUnsupportedArchitecture, "emit_i8x16neg");
|
||||
|
Loading…
Reference in New Issue
Block a user