PPC[liftoff]: Implement simd load lane
Drive-by: Simd128 load and store ops are also grouped within a macro. Change-Id: I7bfefb858472a1dfa6ed7e0615114b57739b1a85 Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/4193366 Commit-Queue: Milad Farazmand <mfarazma@redhat.com> Reviewed-by: Junliang Yan <junyan@redhat.com> Cr-Commit-Position: refs/heads/main@{#85507}
This commit is contained in:
parent
9b305c3322
commit
fa303fcd0b
@ -3572,6 +3572,23 @@ MEM_OP_PREFIXED_LIST(MEM_OP_PREFIXED_FUNCTION)
|
||||
#undef MEM_OP_PREFIXED_LIST
|
||||
#undef MEM_OP_PREFIXED_FUNCTION
|
||||
|
||||
#define MEM_OP_SIMD_LIST(V) \
|
||||
V(LoadSimd128, lxvx) \
|
||||
V(StoreSimd128, stxvx) \
|
||||
V(LoadSimd128Uint64, lxsdx) \
|
||||
V(LoadSimd128Uint32, lxsiwzx) \
|
||||
V(LoadSimd128Uint16, lxsihzx) \
|
||||
V(LoadSimd128Uint8, lxsibzx)
|
||||
|
||||
#define MEM_OP_SIMD_FUNCTION(name, rr_op) \
|
||||
void TurboAssembler::name(Simd128Register reg, const MemOperand& mem, \
|
||||
Register scratch) { \
|
||||
GenerateMemoryOperationRR(reg, mem, rr_op); \
|
||||
}
|
||||
MEM_OP_SIMD_LIST(MEM_OP_SIMD_FUNCTION)
|
||||
#undef MEM_OP_SIMD_LIST
|
||||
#undef MEM_OP_SIMD_FUNCTION
|
||||
|
||||
void TurboAssembler::LoadS8(Register dst, const MemOperand& mem,
|
||||
Register scratch) {
|
||||
LoadU8(dst, mem, scratch);
|
||||
@ -3939,16 +3956,6 @@ void TurboAssembler::I64x2ExtMulHighI32x4U(Simd128Register dst,
|
||||
}
|
||||
#undef EXT_MUL
|
||||
|
||||
void TurboAssembler::LoadSimd128(Simd128Register dst, const MemOperand& mem,
|
||||
Register scratch) {
|
||||
GenerateMemoryOperationRR(dst, mem, lxvx);
|
||||
}
|
||||
|
||||
void TurboAssembler::StoreSimd128(Simd128Register src, const MemOperand& mem,
|
||||
Register scratch) {
|
||||
GenerateMemoryOperationRR(src, mem, stxvx);
|
||||
}
|
||||
|
||||
void TurboAssembler::LoadSimd128LE(Simd128Register dst, const MemOperand& mem,
|
||||
Register scratch) {
|
||||
#ifdef V8_TARGET_BIG_ENDIAN
|
||||
@ -4637,6 +4644,43 @@ void TurboAssembler::I32x4TruncSatF64x2UZero(Simd128Register dst,
|
||||
vinsertd(dst, scratch, Operand(lane_number));
|
||||
}
|
||||
|
||||
#if V8_TARGET_BIG_ENDIAN
|
||||
#define MAYBE_REVERSE_BYTES(reg, instr) instr(reg, reg);
|
||||
#else
|
||||
#define MAYBE_REVERSE_BYTES(reg, instr)
|
||||
#endif
|
||||
void TurboAssembler::LoadLane64LE(Simd128Register dst, const MemOperand& mem,
|
||||
int lane, Register scratch1,
|
||||
Simd128Register scratch2) {
|
||||
constexpr int lane_width_in_bytes = 8;
|
||||
LoadSimd128Uint64(scratch2, mem, scratch1);
|
||||
MAYBE_REVERSE_BYTES(scratch2, xxbrd)
|
||||
vinsertd(dst, scratch2, Operand((1 - lane) * lane_width_in_bytes));
|
||||
}
|
||||
void TurboAssembler::LoadLane32LE(Simd128Register dst, const MemOperand& mem,
|
||||
int lane, Register scratch1,
|
||||
Simd128Register scratch2) {
|
||||
constexpr int lane_width_in_bytes = 4;
|
||||
LoadSimd128Uint32(scratch2, mem, scratch1);
|
||||
MAYBE_REVERSE_BYTES(scratch2, xxbrw)
|
||||
vinsertw(dst, scratch2, Operand((3 - lane) * lane_width_in_bytes));
|
||||
}
|
||||
void TurboAssembler::LoadLane16LE(Simd128Register dst, const MemOperand& mem,
|
||||
int lane, Register scratch1,
|
||||
Simd128Register scratch2) {
|
||||
constexpr int lane_width_in_bytes = 2;
|
||||
LoadSimd128Uint16(scratch2, mem, scratch1);
|
||||
MAYBE_REVERSE_BYTES(scratch2, xxbrh)
|
||||
vinserth(dst, scratch2, Operand((7 - lane) * lane_width_in_bytes));
|
||||
}
|
||||
void TurboAssembler::LoadLane8LE(Simd128Register dst, const MemOperand& mem,
|
||||
int lane, Register scratch1,
|
||||
Simd128Register scratch2) {
|
||||
LoadSimd128Uint8(scratch2, mem, scratch1);
|
||||
vinsertb(dst, scratch2, Operand((15 - lane)));
|
||||
}
|
||||
#undef MAYBE_REVERSE_BYTES
|
||||
|
||||
void TurboAssembler::V128AnyTrue(Register dst, Simd128Register src,
|
||||
Register scratch1, Register scratch2,
|
||||
Simd128Register scratch3) {
|
||||
|
@ -1344,6 +1344,22 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
|
||||
Register scratch);
|
||||
void StoreSimd128LE(Simd128Register src, const MemOperand& mem,
|
||||
Register scratch1, Simd128Register scratch2);
|
||||
void LoadSimd128Uint64(Simd128Register reg, const MemOperand& mem,
|
||||
Register scratch);
|
||||
void LoadSimd128Uint32(Simd128Register reg, const MemOperand& mem,
|
||||
Register scratch);
|
||||
void LoadSimd128Uint16(Simd128Register reg, const MemOperand& mem,
|
||||
Register scratch);
|
||||
void LoadSimd128Uint8(Simd128Register reg, const MemOperand& mem,
|
||||
Register scratch);
|
||||
void LoadLane64LE(Simd128Register dst, const MemOperand& mem, int lane,
|
||||
Register scratch1, Simd128Register scratch2);
|
||||
void LoadLane32LE(Simd128Register dst, const MemOperand& mem, int lane,
|
||||
Register scratch1, Simd128Register scratch2);
|
||||
void LoadLane16LE(Simd128Register dst, const MemOperand& mem, int lane,
|
||||
Register scratch1, Simd128Register scratch2);
|
||||
void LoadLane8LE(Simd128Register dst, const MemOperand& mem, int lane,
|
||||
Register scratch1, Simd128Register scratch2);
|
||||
void F64x2Splat(Simd128Register dst, DoubleRegister src, Register scratch);
|
||||
void F32x4Splat(Simd128Register dst, DoubleRegister src,
|
||||
DoubleRegister scratch1, Register scratch2);
|
||||
|
@ -2453,6 +2453,27 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
|
||||
#undef EMIT_SIMD_EXT_ADD_PAIRWISE
|
||||
#undef SIMD_EXT_ADD_PAIRWISE_LIST
|
||||
|
||||
#define SIMD_LOAD_LANE_LIST(V) \
|
||||
V(S128Load64Lane, LoadLane64LE) \
|
||||
V(S128Load32Lane, LoadLane32LE) \
|
||||
V(S128Load16Lane, LoadLane16LE) \
|
||||
V(S128Load8Lane, LoadLane8LE)
|
||||
|
||||
#define EMIT_SIMD_LOAD_LANE(name, op) \
|
||||
case kPPC_##name: { \
|
||||
Simd128Register dst = i.OutputSimd128Register(); \
|
||||
DCHECK_EQ(dst, i.InputSimd128Register(0)); \
|
||||
AddressingMode mode = kMode_None; \
|
||||
size_t index = 1; \
|
||||
MemOperand operand = i.MemoryOperand(&mode, &index); \
|
||||
DCHECK_EQ(mode, kMode_MRR); \
|
||||
__ op(dst, operand, i.InputUint8(3), kScratchReg, kScratchSimd128Reg); \
|
||||
break; \
|
||||
}
|
||||
SIMD_LOAD_LANE_LIST(EMIT_SIMD_LOAD_LANE)
|
||||
#undef EMIT_SIMD_LOAD_LANE
|
||||
#undef SIMD_LOAD_LANE_LIST
|
||||
|
||||
case kPPC_F64x2Splat: {
|
||||
__ F64x2Splat(i.OutputSimd128Register(), i.InputDoubleRegister(0),
|
||||
kScratchReg);
|
||||
@ -2795,59 +2816,6 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
|
||||
break;
|
||||
}
|
||||
#undef ASSEMBLE_LOAD_TRANSFORM
|
||||
case kPPC_S128Load8Lane: {
|
||||
Simd128Register dst = i.OutputSimd128Register();
|
||||
DCHECK_EQ(dst, i.InputSimd128Register(0));
|
||||
AddressingMode mode = kMode_None;
|
||||
size_t index = 1;
|
||||
MemOperand operand = i.MemoryOperand(&mode, &index);
|
||||
DCHECK_EQ(mode, kMode_MRR);
|
||||
__ lxsibzx(kScratchSimd128Reg, operand);
|
||||
__ vinsertb(dst, kScratchSimd128Reg, Operand(15 - i.InputUint8(3)));
|
||||
break;
|
||||
}
|
||||
case kPPC_S128Load16Lane: {
|
||||
Simd128Register dst = i.OutputSimd128Register();
|
||||
DCHECK_EQ(dst, i.InputSimd128Register(0));
|
||||
constexpr int lane_width_in_bytes = 2;
|
||||
AddressingMode mode = kMode_None;
|
||||
size_t index = 1;
|
||||
MemOperand operand = i.MemoryOperand(&mode, &index);
|
||||
DCHECK_EQ(mode, kMode_MRR);
|
||||
__ lxsihzx(kScratchSimd128Reg, operand);
|
||||
MAYBE_REVERSE_BYTES(kScratchSimd128Reg, xxbrh)
|
||||
__ vinserth(dst, kScratchSimd128Reg,
|
||||
Operand((7 - i.InputUint8(3)) * lane_width_in_bytes));
|
||||
break;
|
||||
}
|
||||
case kPPC_S128Load32Lane: {
|
||||
Simd128Register dst = i.OutputSimd128Register();
|
||||
DCHECK_EQ(dst, i.InputSimd128Register(0));
|
||||
constexpr int lane_width_in_bytes = 4;
|
||||
AddressingMode mode = kMode_None;
|
||||
size_t index = 1;
|
||||
MemOperand operand = i.MemoryOperand(&mode, &index);
|
||||
DCHECK_EQ(mode, kMode_MRR);
|
||||
__ lxsiwzx(kScratchSimd128Reg, operand);
|
||||
MAYBE_REVERSE_BYTES(kScratchSimd128Reg, xxbrw)
|
||||
__ vinsertw(dst, kScratchSimd128Reg,
|
||||
Operand((3 - i.InputUint8(3)) * lane_width_in_bytes));
|
||||
break;
|
||||
}
|
||||
case kPPC_S128Load64Lane: {
|
||||
Simd128Register dst = i.OutputSimd128Register();
|
||||
DCHECK_EQ(dst, i.InputSimd128Register(0));
|
||||
constexpr int lane_width_in_bytes = 8;
|
||||
AddressingMode mode = kMode_None;
|
||||
size_t index = 1;
|
||||
MemOperand operand = i.MemoryOperand(&mode, &index);
|
||||
DCHECK_EQ(mode, kMode_MRR);
|
||||
__ lxsdx(kScratchSimd128Reg, operand);
|
||||
MAYBE_REVERSE_BYTES(kScratchSimd128Reg, xxbrd)
|
||||
__ vinsertd(dst, kScratchSimd128Reg,
|
||||
Operand((1 - i.InputUint8(3)) * lane_width_in_bytes));
|
||||
break;
|
||||
}
|
||||
case kPPC_S128Store8Lane: {
|
||||
AddressingMode mode = kMode_None;
|
||||
size_t index = 1;
|
||||
|
@ -2293,7 +2293,24 @@ void LiftoffAssembler::LoadLane(LiftoffRegister dst, LiftoffRegister src,
|
||||
Register addr, Register offset_reg,
|
||||
uintptr_t offset_imm, LoadType type,
|
||||
uint8_t laneidx, uint32_t* protected_load_pc) {
|
||||
bailout(kSimd, "loadlane");
|
||||
MemOperand src_op = MemOperand(addr, offset_reg, offset_imm);
|
||||
|
||||
MachineType mem_type = type.mem_type();
|
||||
if (dst != src) {
|
||||
vor(dst.fp().toSimd(), src.fp().toSimd(), src.fp().toSimd());
|
||||
}
|
||||
|
||||
if (protected_load_pc) *protected_load_pc = pc_offset();
|
||||
if (mem_type == MachineType::Int8()) {
|
||||
LoadLane8LE(dst.fp().toSimd(), src_op, laneidx, ip, kScratchSimd128Reg);
|
||||
} else if (mem_type == MachineType::Int16()) {
|
||||
LoadLane16LE(dst.fp().toSimd(), src_op, laneidx, ip, kScratchSimd128Reg);
|
||||
} else if (mem_type == MachineType::Int32()) {
|
||||
LoadLane32LE(dst.fp().toSimd(), src_op, laneidx, ip, kScratchSimd128Reg);
|
||||
} else {
|
||||
DCHECK_EQ(MachineType::Int64(), mem_type);
|
||||
LoadLane64LE(dst.fp().toSimd(), src_op, laneidx, ip, kScratchSimd128Reg);
|
||||
}
|
||||
}
|
||||
|
||||
void LiftoffAssembler::StoreLane(Register dst, Register offset,
|
||||
|
Loading…
Reference in New Issue
Block a user