PPC[liftoff]: Implement simd load lane

Drive-by: Simd128 load and store ops are also grouped
within a macro.

Change-Id: I7bfefb858472a1dfa6ed7e0615114b57739b1a85
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/4193366
Commit-Queue: Milad Farazmand <mfarazma@redhat.com>
Reviewed-by: Junliang Yan <junyan@redhat.com>
Cr-Commit-Position: refs/heads/main@{#85507}
This commit is contained in:
Milad Fa 2023-01-25 19:51:19 +00:00 committed by V8 LUCI CQ
parent 9b305c3322
commit fa303fcd0b
4 changed files with 109 additions and 64 deletions

View File

@ -3572,6 +3572,23 @@ MEM_OP_PREFIXED_LIST(MEM_OP_PREFIXED_FUNCTION)
#undef MEM_OP_PREFIXED_LIST
#undef MEM_OP_PREFIXED_FUNCTION
#define MEM_OP_SIMD_LIST(V) \
V(LoadSimd128, lxvx) \
V(StoreSimd128, stxvx) \
V(LoadSimd128Uint64, lxsdx) \
V(LoadSimd128Uint32, lxsiwzx) \
V(LoadSimd128Uint16, lxsihzx) \
V(LoadSimd128Uint8, lxsibzx)
#define MEM_OP_SIMD_FUNCTION(name, rr_op) \
void TurboAssembler::name(Simd128Register reg, const MemOperand& mem, \
Register scratch) { \
GenerateMemoryOperationRR(reg, mem, rr_op); \
}
MEM_OP_SIMD_LIST(MEM_OP_SIMD_FUNCTION)
#undef MEM_OP_SIMD_LIST
#undef MEM_OP_SIMD_FUNCTION
void TurboAssembler::LoadS8(Register dst, const MemOperand& mem,
Register scratch) {
LoadU8(dst, mem, scratch);
@ -3939,16 +3956,6 @@ void TurboAssembler::I64x2ExtMulHighI32x4U(Simd128Register dst,
}
#undef EXT_MUL
void TurboAssembler::LoadSimd128(Simd128Register dst, const MemOperand& mem,
Register scratch) {
GenerateMemoryOperationRR(dst, mem, lxvx);
}
void TurboAssembler::StoreSimd128(Simd128Register src, const MemOperand& mem,
Register scratch) {
GenerateMemoryOperationRR(src, mem, stxvx);
}
void TurboAssembler::LoadSimd128LE(Simd128Register dst, const MemOperand& mem,
Register scratch) {
#ifdef V8_TARGET_BIG_ENDIAN
@ -4637,6 +4644,43 @@ void TurboAssembler::I32x4TruncSatF64x2UZero(Simd128Register dst,
vinsertd(dst, scratch, Operand(lane_number));
}
#if V8_TARGET_BIG_ENDIAN
#define MAYBE_REVERSE_BYTES(reg, instr) instr(reg, reg);
#else
#define MAYBE_REVERSE_BYTES(reg, instr)
#endif
void TurboAssembler::LoadLane64LE(Simd128Register dst, const MemOperand& mem,
int lane, Register scratch1,
Simd128Register scratch2) {
constexpr int lane_width_in_bytes = 8;
LoadSimd128Uint64(scratch2, mem, scratch1);
MAYBE_REVERSE_BYTES(scratch2, xxbrd)
vinsertd(dst, scratch2, Operand((1 - lane) * lane_width_in_bytes));
}
void TurboAssembler::LoadLane32LE(Simd128Register dst, const MemOperand& mem,
int lane, Register scratch1,
Simd128Register scratch2) {
constexpr int lane_width_in_bytes = 4;
LoadSimd128Uint32(scratch2, mem, scratch1);
MAYBE_REVERSE_BYTES(scratch2, xxbrw)
vinsertw(dst, scratch2, Operand((3 - lane) * lane_width_in_bytes));
}
void TurboAssembler::LoadLane16LE(Simd128Register dst, const MemOperand& mem,
int lane, Register scratch1,
Simd128Register scratch2) {
constexpr int lane_width_in_bytes = 2;
LoadSimd128Uint16(scratch2, mem, scratch1);
MAYBE_REVERSE_BYTES(scratch2, xxbrh)
vinserth(dst, scratch2, Operand((7 - lane) * lane_width_in_bytes));
}
void TurboAssembler::LoadLane8LE(Simd128Register dst, const MemOperand& mem,
int lane, Register scratch1,
Simd128Register scratch2) {
LoadSimd128Uint8(scratch2, mem, scratch1);
vinsertb(dst, scratch2, Operand((15 - lane)));
}
#undef MAYBE_REVERSE_BYTES
void TurboAssembler::V128AnyTrue(Register dst, Simd128Register src,
Register scratch1, Register scratch2,
Simd128Register scratch3) {

View File

@ -1344,6 +1344,22 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
Register scratch);
void StoreSimd128LE(Simd128Register src, const MemOperand& mem,
Register scratch1, Simd128Register scratch2);
void LoadSimd128Uint64(Simd128Register reg, const MemOperand& mem,
Register scratch);
void LoadSimd128Uint32(Simd128Register reg, const MemOperand& mem,
Register scratch);
void LoadSimd128Uint16(Simd128Register reg, const MemOperand& mem,
Register scratch);
void LoadSimd128Uint8(Simd128Register reg, const MemOperand& mem,
Register scratch);
void LoadLane64LE(Simd128Register dst, const MemOperand& mem, int lane,
Register scratch1, Simd128Register scratch2);
void LoadLane32LE(Simd128Register dst, const MemOperand& mem, int lane,
Register scratch1, Simd128Register scratch2);
void LoadLane16LE(Simd128Register dst, const MemOperand& mem, int lane,
Register scratch1, Simd128Register scratch2);
void LoadLane8LE(Simd128Register dst, const MemOperand& mem, int lane,
Register scratch1, Simd128Register scratch2);
void F64x2Splat(Simd128Register dst, DoubleRegister src, Register scratch);
void F32x4Splat(Simd128Register dst, DoubleRegister src,
DoubleRegister scratch1, Register scratch2);

View File

@ -2453,6 +2453,27 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
#undef EMIT_SIMD_EXT_ADD_PAIRWISE
#undef SIMD_EXT_ADD_PAIRWISE_LIST
#define SIMD_LOAD_LANE_LIST(V) \
V(S128Load64Lane, LoadLane64LE) \
V(S128Load32Lane, LoadLane32LE) \
V(S128Load16Lane, LoadLane16LE) \
V(S128Load8Lane, LoadLane8LE)
#define EMIT_SIMD_LOAD_LANE(name, op) \
case kPPC_##name: { \
Simd128Register dst = i.OutputSimd128Register(); \
DCHECK_EQ(dst, i.InputSimd128Register(0)); \
AddressingMode mode = kMode_None; \
size_t index = 1; \
MemOperand operand = i.MemoryOperand(&mode, &index); \
DCHECK_EQ(mode, kMode_MRR); \
__ op(dst, operand, i.InputUint8(3), kScratchReg, kScratchSimd128Reg); \
break; \
}
SIMD_LOAD_LANE_LIST(EMIT_SIMD_LOAD_LANE)
#undef EMIT_SIMD_LOAD_LANE
#undef SIMD_LOAD_LANE_LIST
case kPPC_F64x2Splat: {
__ F64x2Splat(i.OutputSimd128Register(), i.InputDoubleRegister(0),
kScratchReg);
@ -2795,59 +2816,6 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
break;
}
#undef ASSEMBLE_LOAD_TRANSFORM
case kPPC_S128Load8Lane: {
Simd128Register dst = i.OutputSimd128Register();
DCHECK_EQ(dst, i.InputSimd128Register(0));
AddressingMode mode = kMode_None;
size_t index = 1;
MemOperand operand = i.MemoryOperand(&mode, &index);
DCHECK_EQ(mode, kMode_MRR);
__ lxsibzx(kScratchSimd128Reg, operand);
__ vinsertb(dst, kScratchSimd128Reg, Operand(15 - i.InputUint8(3)));
break;
}
case kPPC_S128Load16Lane: {
Simd128Register dst = i.OutputSimd128Register();
DCHECK_EQ(dst, i.InputSimd128Register(0));
constexpr int lane_width_in_bytes = 2;
AddressingMode mode = kMode_None;
size_t index = 1;
MemOperand operand = i.MemoryOperand(&mode, &index);
DCHECK_EQ(mode, kMode_MRR);
__ lxsihzx(kScratchSimd128Reg, operand);
MAYBE_REVERSE_BYTES(kScratchSimd128Reg, xxbrh)
__ vinserth(dst, kScratchSimd128Reg,
Operand((7 - i.InputUint8(3)) * lane_width_in_bytes));
break;
}
case kPPC_S128Load32Lane: {
Simd128Register dst = i.OutputSimd128Register();
DCHECK_EQ(dst, i.InputSimd128Register(0));
constexpr int lane_width_in_bytes = 4;
AddressingMode mode = kMode_None;
size_t index = 1;
MemOperand operand = i.MemoryOperand(&mode, &index);
DCHECK_EQ(mode, kMode_MRR);
__ lxsiwzx(kScratchSimd128Reg, operand);
MAYBE_REVERSE_BYTES(kScratchSimd128Reg, xxbrw)
__ vinsertw(dst, kScratchSimd128Reg,
Operand((3 - i.InputUint8(3)) * lane_width_in_bytes));
break;
}
case kPPC_S128Load64Lane: {
Simd128Register dst = i.OutputSimd128Register();
DCHECK_EQ(dst, i.InputSimd128Register(0));
constexpr int lane_width_in_bytes = 8;
AddressingMode mode = kMode_None;
size_t index = 1;
MemOperand operand = i.MemoryOperand(&mode, &index);
DCHECK_EQ(mode, kMode_MRR);
__ lxsdx(kScratchSimd128Reg, operand);
MAYBE_REVERSE_BYTES(kScratchSimd128Reg, xxbrd)
__ vinsertd(dst, kScratchSimd128Reg,
Operand((1 - i.InputUint8(3)) * lane_width_in_bytes));
break;
}
case kPPC_S128Store8Lane: {
AddressingMode mode = kMode_None;
size_t index = 1;

View File

@ -2293,7 +2293,24 @@ void LiftoffAssembler::LoadLane(LiftoffRegister dst, LiftoffRegister src,
Register addr, Register offset_reg,
uintptr_t offset_imm, LoadType type,
uint8_t laneidx, uint32_t* protected_load_pc) {
bailout(kSimd, "loadlane");
MemOperand src_op = MemOperand(addr, offset_reg, offset_imm);
MachineType mem_type = type.mem_type();
if (dst != src) {
vor(dst.fp().toSimd(), src.fp().toSimd(), src.fp().toSimd());
}
if (protected_load_pc) *protected_load_pc = pc_offset();
if (mem_type == MachineType::Int8()) {
LoadLane8LE(dst.fp().toSimd(), src_op, laneidx, ip, kScratchSimd128Reg);
} else if (mem_type == MachineType::Int16()) {
LoadLane16LE(dst.fp().toSimd(), src_op, laneidx, ip, kScratchSimd128Reg);
} else if (mem_type == MachineType::Int32()) {
LoadLane32LE(dst.fp().toSimd(), src_op, laneidx, ip, kScratchSimd128Reg);
} else {
DCHECK_EQ(MachineType::Int64(), mem_type);
LoadLane64LE(dst.fp().toSimd(), src_op, laneidx, ip, kScratchSimd128Reg);
}
}
void LiftoffAssembler::StoreLane(Register dst, Register offset,