diff --git a/src/codegen/ppc/macro-assembler-ppc.cc b/src/codegen/ppc/macro-assembler-ppc.cc index 6a80e63d88..0c74d2bba2 100644 --- a/src/codegen/ppc/macro-assembler-ppc.cc +++ b/src/codegen/ppc/macro-assembler-ppc.cc @@ -3599,6 +3599,92 @@ void TurboAssembler::I8x16ExtractLaneS(Register dst, Simd128Register src, extsb(dst, dst); } +void TurboAssembler::F64x2ReplaceLane(Simd128Register dst, Simd128Register src1, + DoubleRegister src2, uint8_t imm_lane_idx, + Register scratch1, + Simd128Register scratch2) { + constexpr int lane_width_in_bytes = 8; + if (src1 != dst) { + vor(dst, src1, src1); + } + MovDoubleToInt64(scratch1, src2); + if (CpuFeatures::IsSupported(PPC_10_PLUS)) { + vinsd(dst, scratch1, Operand((1 - imm_lane_idx) * lane_width_in_bytes)); + } else { + mtvsrd(scratch2, scratch1); + vinsertd(dst, scratch2, Operand((1 - imm_lane_idx) * lane_width_in_bytes)); + } +} + +void TurboAssembler::F32x4ReplaceLane(Simd128Register dst, Simd128Register src1, + DoubleRegister src2, uint8_t imm_lane_idx, + Register scratch1, + DoubleRegister scratch2, + Simd128Register scratch3) { + constexpr int lane_width_in_bytes = 4; + if (src1 != dst) { + vor(dst, src1, src1); + } + MovFloatToInt(scratch1, src2, scratch2); + if (CpuFeatures::IsSupported(PPC_10_PLUS)) { + vinsw(dst, scratch1, Operand((3 - imm_lane_idx) * lane_width_in_bytes)); + } else { + mtvsrd(scratch3, scratch1); + vinsertw(dst, scratch3, Operand((3 - imm_lane_idx) * lane_width_in_bytes)); + } +} + +void TurboAssembler::I64x2ReplaceLane(Simd128Register dst, Simd128Register src1, + Register src2, uint8_t imm_lane_idx, + Simd128Register scratch) { + constexpr int lane_width_in_bytes = 8; + if (src1 != dst) { + vor(dst, src1, src1); + } + if (CpuFeatures::IsSupported(PPC_10_PLUS)) { + vinsd(dst, src2, Operand((1 - imm_lane_idx) * lane_width_in_bytes)); + } else { + mtvsrd(scratch, src2); + vinsertd(dst, scratch, Operand((1 - imm_lane_idx) * lane_width_in_bytes)); + } +} + +void TurboAssembler::I32x4ReplaceLane(Simd128Register dst, Simd128Register src1, + Register src2, uint8_t imm_lane_idx, + Simd128Register scratch) { + constexpr int lane_width_in_bytes = 4; + if (src1 != dst) { + vor(dst, src1, src1); + } + if (CpuFeatures::IsSupported(PPC_10_PLUS)) { + vinsw(dst, src2, Operand((3 - imm_lane_idx) * lane_width_in_bytes)); + } else { + mtvsrd(scratch, src2); + vinsertw(dst, scratch, Operand((3 - imm_lane_idx) * lane_width_in_bytes)); + } +} + +void TurboAssembler::I16x8ReplaceLane(Simd128Register dst, Simd128Register src1, + Register src2, uint8_t imm_lane_idx, + Simd128Register scratch) { + constexpr int lane_width_in_bytes = 2; + if (src1 != dst) { + vor(dst, src1, src1); + } + mtvsrd(scratch, src2); + vinserth(dst, scratch, Operand((7 - imm_lane_idx) * lane_width_in_bytes)); +} + +void TurboAssembler::I8x16ReplaceLane(Simd128Register dst, Simd128Register src1, + Register src2, uint8_t imm_lane_idx, + Simd128Register scratch) { + if (src1 != dst) { + vor(dst, src1, src1); + } + mtvsrd(scratch, src2); + vinsertb(dst, scratch, Operand(15 - imm_lane_idx)); +} + Register GetRegisterThatIsNotOneOf(Register reg1, Register reg2, Register reg3, Register reg4, Register reg5, Register reg6) { diff --git a/src/codegen/ppc/macro-assembler-ppc.h b/src/codegen/ppc/macro-assembler-ppc.h index 9bcd3e2d29..b316ff40f5 100644 --- a/src/codegen/ppc/macro-assembler-ppc.h +++ b/src/codegen/ppc/macro-assembler-ppc.h @@ -1102,6 +1102,25 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase { uint8_t imm_lane_idx, Simd128Register scratch); void I8x16ExtractLaneS(Register dst, Simd128Register src, uint8_t imm_lane_idx, Simd128Register scratch); + void F64x2ReplaceLane(Simd128Register dst, Simd128Register src1, + DoubleRegister src2, uint8_t imm_lane_idx, + Register scratch1, Simd128Register scratch2); + void F32x4ReplaceLane(Simd128Register dst, Simd128Register src1, + DoubleRegister src2, uint8_t imm_lane_idx, + Register scratch1, DoubleRegister scratch2, + Simd128Register scratch3); + void I64x2ReplaceLane(Simd128Register dst, Simd128Register src1, + Register src2, uint8_t imm_lane_idx, + Simd128Register scratch); + void I32x4ReplaceLane(Simd128Register dst, Simd128Register src1, + Register src2, uint8_t imm_lane_idx, + Simd128Register scratch); + void I16x8ReplaceLane(Simd128Register dst, Simd128Register src1, + Register src2, uint8_t imm_lane_idx, + Simd128Register scratch); + void I8x16ReplaceLane(Simd128Register dst, Simd128Register src1, + Register src2, uint8_t imm_lane_idx, + Simd128Register scratch); private: static const int kSmiShift = kSmiTagSize + kSmiShiftSize; diff --git a/src/compiler/backend/ppc/code-generator-ppc.cc b/src/compiler/backend/ppc/code-generator-ppc.cc index 5206b56d76..741a7cf34c 100644 --- a/src/compiler/backend/ppc/code-generator-ppc.cc +++ b/src/compiler/backend/ppc/code-generator-ppc.cc @@ -2263,74 +2263,44 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( } case kPPC_F64x2ReplaceLane: { DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); - constexpr int lane_width_in_bytes = 8; - Simd128Register dst = i.OutputSimd128Register(); - __ MovDoubleToInt64(r0, i.InputDoubleRegister(2)); - if (CpuFeatures::IsSupported(PPC_10_PLUS)) { - __ vinsd(dst, r0, Operand((1 - i.InputInt8(1)) * lane_width_in_bytes)); - } else { - __ mtvsrd(kScratchSimd128Reg, r0); - __ vinsertd(dst, kScratchSimd128Reg, - Operand((1 - i.InputInt8(1)) * lane_width_in_bytes)); - } + __ F64x2ReplaceLane(i.OutputSimd128Register(), i.InputSimd128Register(0), + i.InputDoubleRegister(2), i.InputInt8(1), kScratchReg, + kScratchSimd128Reg); break; } case kPPC_F32x4ReplaceLane: { DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); - constexpr int lane_width_in_bytes = 4; - Simd128Register dst = i.OutputSimd128Register(); - __ MovFloatToInt(r0, i.InputDoubleRegister(2), kScratchDoubleReg); - if (CpuFeatures::IsSupported(PPC_10_PLUS)) { - __ vinsw(dst, r0, Operand((3 - i.InputInt8(1)) * lane_width_in_bytes)); - } else { - __ mtvsrd(kScratchSimd128Reg, r0); - __ vinsertw(dst, kScratchSimd128Reg, - Operand((3 - i.InputInt8(1)) * lane_width_in_bytes)); - } + __ F32x4ReplaceLane(i.OutputSimd128Register(), i.InputSimd128Register(0), + i.InputDoubleRegister(2), i.InputInt8(1), kScratchReg, + kScratchDoubleReg, kScratchSimd128Reg); break; } case kPPC_I64x2ReplaceLane: { DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); - constexpr int lane_width_in_bytes = 8; - Simd128Register dst = i.OutputSimd128Register(); - if (CpuFeatures::IsSupported(PPC_10_PLUS)) { - __ vinsd(dst, i.InputRegister(2), - Operand((1 - i.InputInt8(1)) * lane_width_in_bytes)); - } else { - __ mtvsrd(kScratchSimd128Reg, i.InputRegister(2)); - __ vinsertd(dst, kScratchSimd128Reg, - Operand((1 - i.InputInt8(1)) * lane_width_in_bytes)); - } + __ I64x2ReplaceLane(i.OutputSimd128Register(), i.InputSimd128Register(0), + i.InputRegister(2), i.InputInt8(1), + kScratchSimd128Reg); break; } case kPPC_I32x4ReplaceLane: { DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); - constexpr int lane_width_in_bytes = 4; - Simd128Register dst = i.OutputSimd128Register(); - if (CpuFeatures::IsSupported(PPC_10_PLUS)) { - __ vinsw(dst, i.InputRegister(2), - Operand((3 - i.InputInt8(1)) * lane_width_in_bytes)); - } else { - __ mtvsrd(kScratchSimd128Reg, i.InputRegister(2)); - __ vinsertw(dst, kScratchSimd128Reg, - Operand((3 - i.InputInt8(1)) * lane_width_in_bytes)); - } + __ I32x4ReplaceLane(i.OutputSimd128Register(), i.InputSimd128Register(0), + i.InputRegister(2), i.InputInt8(1), + kScratchSimd128Reg); break; } case kPPC_I16x8ReplaceLane: { DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); - constexpr int lane_width_in_bytes = 2; - Simd128Register dst = i.OutputSimd128Register(); - __ mtvsrd(kScratchSimd128Reg, i.InputRegister(2)); - __ vinserth(dst, kScratchSimd128Reg, - Operand((7 - i.InputInt8(1)) * lane_width_in_bytes)); + __ I16x8ReplaceLane(i.OutputSimd128Register(), i.InputSimd128Register(0), + i.InputRegister(2), i.InputInt8(1), + kScratchSimd128Reg); break; } case kPPC_I8x16ReplaceLane: { DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); - Simd128Register dst = i.OutputSimd128Register(); - __ mtvsrd(kScratchSimd128Reg, i.InputRegister(2)); - __ vinsertb(dst, kScratchSimd128Reg, Operand(15 - i.InputInt8(1))); + __ I8x16ReplaceLane(i.OutputSimd128Register(), i.InputSimd128Register(0), + i.InputRegister(2), i.InputInt8(1), + kScratchSimd128Reg); break; } case kPPC_F64x2Add: { diff --git a/src/wasm/baseline/ppc/liftoff-assembler-ppc.h b/src/wasm/baseline/ppc/liftoff-assembler-ppc.h index 2dec6ec856..8d8c4745e8 100644 --- a/src/wasm/baseline/ppc/liftoff-assembler-ppc.h +++ b/src/wasm/baseline/ppc/liftoff-assembler-ppc.h @@ -1826,6 +1826,54 @@ void LiftoffAssembler::emit_i8x16_extract_lane_s(LiftoffRegister dst, kScratchSimd128Reg); } +void LiftoffAssembler::emit_f64x2_replace_lane(LiftoffRegister dst, + LiftoffRegister src1, + LiftoffRegister src2, + uint8_t imm_lane_idx) { + F64x2ReplaceLane(dst.fp().toSimd(), src1.fp().toSimd(), src2.fp(), + imm_lane_idx, r0, kScratchSimd128Reg); +} + +void LiftoffAssembler::emit_f32x4_replace_lane(LiftoffRegister dst, + LiftoffRegister src1, + LiftoffRegister src2, + uint8_t imm_lane_idx) { + F32x4ReplaceLane(dst.fp().toSimd(), src1.fp().toSimd(), src2.fp(), + imm_lane_idx, r0, kScratchDoubleReg, kScratchSimd128Reg); +} + +void LiftoffAssembler::emit_i64x2_replace_lane(LiftoffRegister dst, + LiftoffRegister src1, + LiftoffRegister src2, + uint8_t imm_lane_idx) { + I64x2ReplaceLane(dst.fp().toSimd(), src1.fp().toSimd(), src2.gp(), + imm_lane_idx, kScratchSimd128Reg); +} + +void LiftoffAssembler::emit_i32x4_replace_lane(LiftoffRegister dst, + LiftoffRegister src1, + LiftoffRegister src2, + uint8_t imm_lane_idx) { + I32x4ReplaceLane(dst.fp().toSimd(), src1.fp().toSimd(), src2.gp(), + imm_lane_idx, kScratchSimd128Reg); +} + +void LiftoffAssembler::emit_i16x8_replace_lane(LiftoffRegister dst, + LiftoffRegister src1, + LiftoffRegister src2, + uint8_t imm_lane_idx) { + I16x8ReplaceLane(dst.fp().toSimd(), src1.fp().toSimd(), src2.gp(), + imm_lane_idx, kScratchSimd128Reg); +} + +void LiftoffAssembler::emit_i8x16_replace_lane(LiftoffRegister dst, + LiftoffRegister src1, + LiftoffRegister src2, + uint8_t imm_lane_idx) { + I8x16ReplaceLane(dst.fp().toSimd(), src1.fp().toSimd(), src2.gp(), + imm_lane_idx, kScratchSimd128Reg); +} + void LiftoffAssembler::LoadTransform(LiftoffRegister dst, Register src_addr, Register offset_reg, uintptr_t offset_imm, LoadType type, @@ -1895,13 +1943,6 @@ void LiftoffAssembler::emit_s128_relaxed_laneselect(LiftoffRegister dst, bailout(kRelaxedSimd, "emit_s128_relaxed_laneselect"); } -void LiftoffAssembler::emit_f64x2_replace_lane(LiftoffRegister dst, - LiftoffRegister src1, - LiftoffRegister src2, - uint8_t imm_lane_idx) { - bailout(kUnsupportedArchitecture, "emit_f64x2replacelane"); -} - void LiftoffAssembler::emit_f64x2_abs(LiftoffRegister dst, LiftoffRegister src) { bailout(kUnsupportedArchitecture, "emit_f64x2_abs"); @@ -2008,13 +2049,6 @@ void LiftoffAssembler::emit_f64x2_promote_low_f32x4(LiftoffRegister dst, bailout(kSimd, "f64x2.promote_low_f32x4"); } -void LiftoffAssembler::emit_f32x4_replace_lane(LiftoffRegister dst, - LiftoffRegister src1, - LiftoffRegister src2, - uint8_t imm_lane_idx) { - bailout(kUnsupportedArchitecture, "emit_f32x4replacelane"); -} - void LiftoffAssembler::emit_f32x4_abs(LiftoffRegister dst, LiftoffRegister src) { bailout(kUnsupportedArchitecture, "emit_f32x4_abs"); @@ -2106,13 +2140,6 @@ void LiftoffAssembler::emit_f32x4_pmax(LiftoffRegister dst, LiftoffRegister lhs, bailout(kSimd, "pmax unimplemented"); } -void LiftoffAssembler::emit_i64x2_replace_lane(LiftoffRegister dst, - LiftoffRegister src1, - LiftoffRegister src2, - uint8_t imm_lane_idx) { - bailout(kUnsupportedArchitecture, "emit_i64x2replacelane"); -} - void LiftoffAssembler::emit_i64x2_neg(LiftoffRegister dst, LiftoffRegister src) { bailout(kUnsupportedArchitecture, "emit_i64x2neg"); @@ -2219,13 +2246,6 @@ void LiftoffAssembler::emit_i64x2_extmul_high_i32x4_u(LiftoffRegister dst, bailout(kSimd, "i64x2_extmul_high_i32x4_u unsupported"); } -void LiftoffAssembler::emit_i32x4_replace_lane(LiftoffRegister dst, - LiftoffRegister src1, - LiftoffRegister src2, - uint8_t imm_lane_idx) { - bailout(kUnsupportedArchitecture, "emit_i32x4replacelane"); -} - void LiftoffAssembler::emit_i32x4_neg(LiftoffRegister dst, LiftoffRegister src) { bailout(kUnsupportedArchitecture, "emit_i32x4neg"); @@ -2462,13 +2482,6 @@ void LiftoffAssembler::emit_i16x8_max_u(LiftoffRegister dst, bailout(kUnsupportedArchitecture, "emit_i16x8_max_u"); } -void LiftoffAssembler::emit_i16x8_replace_lane(LiftoffRegister dst, - LiftoffRegister src1, - LiftoffRegister src2, - uint8_t imm_lane_idx) { - bailout(kUnsupportedArchitecture, "emit_i16x8replacelane"); -} - void LiftoffAssembler::emit_i16x8_extadd_pairwise_i8x16_s(LiftoffRegister dst, LiftoffRegister src) { bailout(kSimd, "i16x8.extadd_pairwise_i8x16_s"); @@ -2541,13 +2554,6 @@ void LiftoffAssembler::emit_i8x16_popcnt(LiftoffRegister dst, bailout(kSimd, "i8x16.popcnt"); } -void LiftoffAssembler::emit_i8x16_replace_lane(LiftoffRegister dst, - LiftoffRegister src1, - LiftoffRegister src2, - uint8_t imm_lane_idx) { - bailout(kUnsupportedArchitecture, "emit_i8x16replacelane"); -} - void LiftoffAssembler::emit_i8x16_neg(LiftoffRegister dst, LiftoffRegister src) { bailout(kUnsupportedArchitecture, "emit_i8x16neg");