PPC [simd]: refactor simd load/store in macro-asm

This CL refactors simd load/store to accept a scratch register
which will be used in macro-asm.

LE enforced versions of them is also introduced.

Change-Id: I97f4f4870d7889204b1d42cf50de85e234ecae36
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3765514
Reviewed-by: Junliang Yan <junyan@redhat.com>
Commit-Queue: Milad Farazmand <mfarazma@redhat.com>
Cr-Commit-Position: refs/heads/main@{#81757}
This commit is contained in:
Milad Fa 2022-07-15 15:02:38 -04:00 committed by V8 LUCI CQ
parent 043a5cac0a
commit 21e5dbdab2
3 changed files with 84 additions and 64 deletions

View File

@ -450,8 +450,7 @@ void TurboAssembler::MultiPushV128(Simd128RegList simd_regs,
if ((simd_regs.bits() & (1 << i)) != 0) {
Simd128Register simd_reg = Simd128Register::from_code(i);
stack_offset -= kSimd128Size;
li(ip, Operand(stack_offset));
StoreSimd128(simd_reg, MemOperand(location, ip));
StoreSimd128(simd_reg, MemOperand(location, stack_offset), ip);
}
}
}
@ -475,8 +474,7 @@ void TurboAssembler::MultiPopV128(Simd128RegList simd_regs, Register location) {
for (int16_t i = 0; i < Simd128Register::kNumRegisters; i++) {
if ((simd_regs.bits() & (1 << i)) != 0) {
Simd128Register simd_reg = Simd128Register::from_code(i);
li(ip, Operand(stack_offset));
LoadSimd128(simd_reg, MemOperand(location, ip));
LoadSimd128(simd_reg, MemOperand(location, stack_offset), ip);
stack_offset += kSimd128Size;
}
}
@ -3195,6 +3193,26 @@ void MacroAssembler::AndSmiLiteral(Register dst, Register src, Smi smi,
} \
}
#define GenerateMemoryOperationRR(reg, mem, op) \
{ \
if (mem.offset() == 0) { \
if (mem.rb() != no_reg) \
op(reg, mem); \
else \
op(reg, MemOperand(r0, mem.ra())); \
} else if (is_int16(mem.offset())) { \
if (mem.rb() != no_reg) \
addi(scratch, mem.rb(), Operand(mem.offset())); \
else \
mov(scratch, Operand(mem.offset())); \
op(reg, MemOperand(mem.ra(), scratch)); \
} else { \
mov(scratch, Operand(mem.offset())); \
if (mem.rb() != no_reg) add(scratch, scratch, mem.rb()); \
op(reg, MemOperand(mem.ra(), scratch)); \
} \
}
#define GenerateMemoryOperationPrefixed(reg, mem, ri_op, rip_op, rr_op) \
{ \
int64_t offset = mem.offset(); \
@ -3359,36 +3377,6 @@ void TurboAssembler::LoadS8(Register dst, const MemOperand& mem,
extsb(dst, dst);
}
void TurboAssembler::LoadSimd128(Simd128Register src, const MemOperand& mem) {
DCHECK(mem.rb().is_valid());
lxvx(src, mem);
}
void TurboAssembler::StoreSimd128(Simd128Register src, const MemOperand& mem) {
DCHECK(mem.rb().is_valid());
stxvx(src, mem);
}
#define GenerateMemoryLEOperation(reg, mem, op) \
{ \
if (mem.offset() == 0) { \
if (mem.rb() != no_reg) \
op(reg, mem); \
else \
op(reg, MemOperand(r0, mem.ra())); \
} else if (is_int16(mem.offset())) { \
if (mem.rb() != no_reg) \
addi(scratch, mem.rb(), Operand(mem.offset())); \
else \
mov(scratch, Operand(mem.offset())); \
op(reg, MemOperand(mem.ra(), scratch)); \
} else { \
mov(scratch, Operand(mem.offset())); \
if (mem.rb() != no_reg) add(scratch, scratch, mem.rb()); \
op(reg, MemOperand(mem.ra(), scratch)); \
} \
}
#define MEM_LE_OP_LIST(V) \
V(LoadU64, ldbrx) \
V(LoadU32, lwbrx) \
@ -3401,7 +3389,7 @@ void TurboAssembler::StoreSimd128(Simd128Register src, const MemOperand& mem) {
#define MEM_LE_OP_FUNCTION(name, op) \
void TurboAssembler::name##LE(Register reg, const MemOperand& mem, \
Register scratch) { \
GenerateMemoryLEOperation(reg, mem, op); \
GenerateMemoryOperationRR(reg, mem, op); \
}
#else
#define MEM_LE_OP_FUNCTION(name, op) \
@ -3481,6 +3469,38 @@ void TurboAssembler::StoreF32LE(DoubleRegister dst, const MemOperand& mem,
#endif
}
// Simd Support.
void TurboAssembler::LoadSimd128(Simd128Register dst, const MemOperand& mem,
Register scratch) {
GenerateMemoryOperationRR(dst, mem, lxvx);
}
void TurboAssembler::StoreSimd128(Simd128Register src, const MemOperand& mem,
Register scratch) {
GenerateMemoryOperationRR(src, mem, stxvx);
}
void TurboAssembler::LoadSimd128LE(Simd128Register dst, const MemOperand& mem,
Register scratch) {
#ifdef V8_TARGET_BIG_ENDIAN
LoadSimd128(dst, mem, scratch);
xxbrq(dst, dst);
#else
LoadSimd128(dst, mem, scratch);
#endif
}
void TurboAssembler::StoreSimd128LE(Simd128Register src, const MemOperand& mem,
Register scratch1,
Simd128Register scratch2) {
#ifdef V8_TARGET_BIG_ENDIAN
xxbrq(scratch2, src);
StoreSimd128(scratch2, mem, scratch1);
#else
StoreSimd128(src, mem, scratch1);
#endif
}
Register GetRegisterThatIsNotOneOf(Register reg1, Register reg2, Register reg3,
Register reg4, Register reg5,
Register reg6) {
@ -3612,23 +3632,19 @@ void TurboAssembler::SwapSimd128(Simd128Register src, Simd128Register dst,
void TurboAssembler::SwapSimd128(Simd128Register src, MemOperand dst,
Simd128Register scratch) {
DCHECK(src != scratch);
mov(ip, Operand(dst.offset()));
LoadSimd128(scratch, MemOperand(dst.ra(), ip));
StoreSimd128(src, MemOperand(dst.ra(), ip));
LoadSimd128(scratch, dst, ip);
StoreSimd128(src, dst, ip);
vor(src, scratch, scratch);
}
void TurboAssembler::SwapSimd128(MemOperand src, MemOperand dst,
Simd128Register scratch1,
Simd128Register scratch2) {
mov(ip, Operand(src.offset()));
LoadSimd128(scratch1, MemOperand(src.ra(), ip));
mov(ip, Operand(dst.offset()));
LoadSimd128(scratch2, MemOperand(dst.ra(), ip));
LoadSimd128(scratch1, src, ip);
LoadSimd128(scratch2, dst, ip);
StoreSimd128(scratch1, MemOperand(dst.ra(), ip));
mov(ip, Operand(src.offset()));
StoreSimd128(scratch2, MemOperand(src.ra(), ip));
StoreSimd128(scratch1, dst, ip);
StoreSimd128(scratch2, src, ip);
}
void TurboAssembler::ByteReverseU16(Register dst, Register val,

View File

@ -143,7 +143,6 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
void LoadDoubleLiteral(DoubleRegister result, base::Double value,
Register scratch);
void LoadSimd128(Simd128Register dst, const MemOperand& mem);
// load a literal signed int value <value> to GPR <dst>
void LoadIntLiteral(Register dst, int value);
@ -1025,8 +1024,6 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
void StoreF64WithUpdate(DoubleRegister src, const MemOperand& mem,
Register scratch = no_reg);
void StoreSimd128(Simd128Register src, const MemOperand& mem);
void LoadU64(Register dst, const MemOperand& mem, Register scratch = no_reg);
void LoadU32(Register dst, const MemOperand& mem, Register scratch = no_reg);
void LoadS32(Register dst, const MemOperand& mem, Register scratch = no_reg);
@ -1065,6 +1062,16 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
void StoreF64LE(DoubleRegister src, const MemOperand& mem, Register scratch,
Register scratch2);
// Simd Support.
void LoadSimd128(Simd128Register dst, const MemOperand& mem,
Register scratch);
void StoreSimd128(Simd128Register src, const MemOperand& mem,
Register scratch);
void LoadSimd128LE(Simd128Register dst, const MemOperand& mem,
Register scratch);
void StoreSimd128LE(Simd128Register src, const MemOperand& mem,
Register scratch1, Simd128Register scratch2);
private:
static const int kSmiShift = kSmiTagSize + kSmiShiftSize;

View File

@ -1184,8 +1184,8 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ LoadF32(i.OutputFloatRegister(), MemOperand(fp, offset), r0);
} else {
DCHECK_EQ(MachineRepresentation::kSimd128, op->representation());
__ mov(ip, Operand(offset));
__ LoadSimd128(i.OutputSimd128Register(), MemOperand(fp, ip));
__ LoadSimd128(i.OutputSimd128Register(), MemOperand(fp, offset),
kScratchReg);
}
} else {
__ LoadU64(i.OutputRegister(), MemOperand(fp, offset), r0);
@ -1701,7 +1701,8 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
break;
case MachineRepresentation::kSimd128:
__ addi(sp, sp, Operand(-kSimd128Size));
__ StoreSimd128(i.InputSimd128Register(1), MemOperand(r0, sp));
__ StoreSimd128(i.InputSimd128Register(1), MemOperand(r0, sp),
kScratchReg);
break;
default:
__ StoreU64WithUpdate(i.InputRegister(1),
@ -1745,8 +1746,9 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
MemOperand(sp, slot * kSystemPointerSize), r0);
} else {
DCHECK_EQ(MachineRepresentation::kSimd128, op->representation());
__ mov(ip, Operand(slot * kSystemPointerSize));
__ StoreSimd128(i.InputSimd128Register(0), MemOperand(ip, sp));
__ StoreSimd128(i.InputSimd128Register(0),
MemOperand(sp, slot * kSystemPointerSize),
kScratchReg);
}
} else {
__ StoreU64(i.InputRegister(0),
@ -2007,7 +2009,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
MemOperand operand = i.MemoryOperand(&mode);
bool is_atomic = i.InputInt32(2);
DCHECK_EQ(mode, kMode_MRR);
__ LoadSimd128(result, operand);
__ LoadSimd128(result, operand, kScratchReg);
if (is_atomic) __ lwsync();
DCHECK_EQ(LeaveRC, i.OutputRCBit());
break;
@ -2044,7 +2046,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
bool is_atomic = i.InputInt32(3);
if (is_atomic) __ lwsync();
DCHECK_EQ(mode, kMode_MRR);
__ StoreSimd128(value, operand);
__ StoreSimd128(value, operand, kScratchReg);
if (is_atomic) __ sync();
DCHECK_EQ(LeaveRC, i.OutputRCBit());
break;
@ -4486,8 +4488,7 @@ void CodeGenerator::AssembleMove(InstructionOperand* source,
} else {
DCHECK(destination->IsSimd128StackSlot());
MemOperand dst = g.ToMemOperand(destination);
__ mov(ip, Operand(dst.offset()));
__ StoreSimd128(g.ToSimd128Register(source), MemOperand(dst.ra(), ip));
__ StoreSimd128(g.ToSimd128Register(source), dst, kScratchReg);
}
} else {
DoubleRegister src = g.ToDoubleRegister(source);
@ -4516,9 +4517,7 @@ void CodeGenerator::AssembleMove(InstructionOperand* source,
} else {
DCHECK_EQ(MachineRepresentation::kSimd128, op->representation());
MemOperand src = g.ToMemOperand(source);
__ mov(ip, Operand(src.offset()));
__ LoadSimd128(g.ToSimd128Register(destination),
MemOperand(src.ra(), ip));
__ LoadSimd128(g.ToSimd128Register(destination), src, kScratchReg);
}
} else {
LocationOperand* op = LocationOperand::cast(source);
@ -4533,10 +4532,8 @@ void CodeGenerator::AssembleMove(InstructionOperand* source,
DCHECK_EQ(MachineRepresentation::kSimd128, op->representation());
MemOperand src = g.ToMemOperand(source);
MemOperand dst = g.ToMemOperand(destination);
__ mov(ip, Operand(src.offset()));
__ LoadSimd128(kScratchSimd128Reg, MemOperand(src.ra(), ip));
__ mov(ip, Operand(dst.offset()));
__ StoreSimd128(kScratchSimd128Reg, MemOperand(dst.ra(), ip));
__ LoadSimd128(kScratchSimd128Reg, src, kScratchReg);
__ StoreSimd128(kScratchSimd128Reg, dst, kScratchReg);
}
}
} else {