From 21e5dbdab2460ef5672f378a7a6758d2e4ba8c0a Mon Sep 17 00:00:00 2001 From: Milad Fa Date: Fri, 15 Jul 2022 15:02:38 -0400 Subject: [PATCH] PPC [simd]: refactor simd load/store in macro-asm This CL refactors simd load/store to accept a scratch register which will be used in macro-asm. LE enforced versions of them is also introduced. Change-Id: I97f4f4870d7889204b1d42cf50de85e234ecae36 Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3765514 Reviewed-by: Junliang Yan Commit-Queue: Milad Farazmand Cr-Commit-Position: refs/heads/main@{#81757} --- src/codegen/ppc/macro-assembler-ppc.cc | 106 ++++++++++-------- src/codegen/ppc/macro-assembler-ppc.h | 13 ++- .../backend/ppc/code-generator-ppc.cc | 29 +++-- 3 files changed, 84 insertions(+), 64 deletions(-) diff --git a/src/codegen/ppc/macro-assembler-ppc.cc b/src/codegen/ppc/macro-assembler-ppc.cc index 696ea5d1d4..13a6e9fed2 100644 --- a/src/codegen/ppc/macro-assembler-ppc.cc +++ b/src/codegen/ppc/macro-assembler-ppc.cc @@ -450,8 +450,7 @@ void TurboAssembler::MultiPushV128(Simd128RegList simd_regs, if ((simd_regs.bits() & (1 << i)) != 0) { Simd128Register simd_reg = Simd128Register::from_code(i); stack_offset -= kSimd128Size; - li(ip, Operand(stack_offset)); - StoreSimd128(simd_reg, MemOperand(location, ip)); + StoreSimd128(simd_reg, MemOperand(location, stack_offset), ip); } } } @@ -475,8 +474,7 @@ void TurboAssembler::MultiPopV128(Simd128RegList simd_regs, Register location) { for (int16_t i = 0; i < Simd128Register::kNumRegisters; i++) { if ((simd_regs.bits() & (1 << i)) != 0) { Simd128Register simd_reg = Simd128Register::from_code(i); - li(ip, Operand(stack_offset)); - LoadSimd128(simd_reg, MemOperand(location, ip)); + LoadSimd128(simd_reg, MemOperand(location, stack_offset), ip); stack_offset += kSimd128Size; } } @@ -3195,6 +3193,26 @@ void MacroAssembler::AndSmiLiteral(Register dst, Register src, Smi smi, } \ } +#define GenerateMemoryOperationRR(reg, mem, op) \ + { \ + if (mem.offset() == 0) { \ + if (mem.rb() != no_reg) \ + op(reg, mem); \ + else \ + op(reg, MemOperand(r0, mem.ra())); \ + } else if (is_int16(mem.offset())) { \ + if (mem.rb() != no_reg) \ + addi(scratch, mem.rb(), Operand(mem.offset())); \ + else \ + mov(scratch, Operand(mem.offset())); \ + op(reg, MemOperand(mem.ra(), scratch)); \ + } else { \ + mov(scratch, Operand(mem.offset())); \ + if (mem.rb() != no_reg) add(scratch, scratch, mem.rb()); \ + op(reg, MemOperand(mem.ra(), scratch)); \ + } \ + } + #define GenerateMemoryOperationPrefixed(reg, mem, ri_op, rip_op, rr_op) \ { \ int64_t offset = mem.offset(); \ @@ -3359,36 +3377,6 @@ void TurboAssembler::LoadS8(Register dst, const MemOperand& mem, extsb(dst, dst); } -void TurboAssembler::LoadSimd128(Simd128Register src, const MemOperand& mem) { - DCHECK(mem.rb().is_valid()); - lxvx(src, mem); -} - -void TurboAssembler::StoreSimd128(Simd128Register src, const MemOperand& mem) { - DCHECK(mem.rb().is_valid()); - stxvx(src, mem); -} - -#define GenerateMemoryLEOperation(reg, mem, op) \ - { \ - if (mem.offset() == 0) { \ - if (mem.rb() != no_reg) \ - op(reg, mem); \ - else \ - op(reg, MemOperand(r0, mem.ra())); \ - } else if (is_int16(mem.offset())) { \ - if (mem.rb() != no_reg) \ - addi(scratch, mem.rb(), Operand(mem.offset())); \ - else \ - mov(scratch, Operand(mem.offset())); \ - op(reg, MemOperand(mem.ra(), scratch)); \ - } else { \ - mov(scratch, Operand(mem.offset())); \ - if (mem.rb() != no_reg) add(scratch, scratch, mem.rb()); \ - op(reg, MemOperand(mem.ra(), scratch)); \ - } \ - } - #define MEM_LE_OP_LIST(V) \ V(LoadU64, ldbrx) \ V(LoadU32, lwbrx) \ @@ -3401,7 +3389,7 @@ void TurboAssembler::StoreSimd128(Simd128Register src, const MemOperand& mem) { #define MEM_LE_OP_FUNCTION(name, op) \ void TurboAssembler::name##LE(Register reg, const MemOperand& mem, \ Register scratch) { \ - GenerateMemoryLEOperation(reg, mem, op); \ + GenerateMemoryOperationRR(reg, mem, op); \ } #else #define MEM_LE_OP_FUNCTION(name, op) \ @@ -3481,6 +3469,38 @@ void TurboAssembler::StoreF32LE(DoubleRegister dst, const MemOperand& mem, #endif } +// Simd Support. +void TurboAssembler::LoadSimd128(Simd128Register dst, const MemOperand& mem, + Register scratch) { + GenerateMemoryOperationRR(dst, mem, lxvx); +} + +void TurboAssembler::StoreSimd128(Simd128Register src, const MemOperand& mem, + Register scratch) { + GenerateMemoryOperationRR(src, mem, stxvx); +} + +void TurboAssembler::LoadSimd128LE(Simd128Register dst, const MemOperand& mem, + Register scratch) { +#ifdef V8_TARGET_BIG_ENDIAN + LoadSimd128(dst, mem, scratch); + xxbrq(dst, dst); +#else + LoadSimd128(dst, mem, scratch); +#endif +} + +void TurboAssembler::StoreSimd128LE(Simd128Register src, const MemOperand& mem, + Register scratch1, + Simd128Register scratch2) { +#ifdef V8_TARGET_BIG_ENDIAN + xxbrq(scratch2, src); + StoreSimd128(scratch2, mem, scratch1); +#else + StoreSimd128(src, mem, scratch1); +#endif +} + Register GetRegisterThatIsNotOneOf(Register reg1, Register reg2, Register reg3, Register reg4, Register reg5, Register reg6) { @@ -3612,23 +3632,19 @@ void TurboAssembler::SwapSimd128(Simd128Register src, Simd128Register dst, void TurboAssembler::SwapSimd128(Simd128Register src, MemOperand dst, Simd128Register scratch) { DCHECK(src != scratch); - mov(ip, Operand(dst.offset())); - LoadSimd128(scratch, MemOperand(dst.ra(), ip)); - StoreSimd128(src, MemOperand(dst.ra(), ip)); + LoadSimd128(scratch, dst, ip); + StoreSimd128(src, dst, ip); vor(src, scratch, scratch); } void TurboAssembler::SwapSimd128(MemOperand src, MemOperand dst, Simd128Register scratch1, Simd128Register scratch2) { - mov(ip, Operand(src.offset())); - LoadSimd128(scratch1, MemOperand(src.ra(), ip)); - mov(ip, Operand(dst.offset())); - LoadSimd128(scratch2, MemOperand(dst.ra(), ip)); + LoadSimd128(scratch1, src, ip); + LoadSimd128(scratch2, dst, ip); - StoreSimd128(scratch1, MemOperand(dst.ra(), ip)); - mov(ip, Operand(src.offset())); - StoreSimd128(scratch2, MemOperand(src.ra(), ip)); + StoreSimd128(scratch1, dst, ip); + StoreSimd128(scratch2, src, ip); } void TurboAssembler::ByteReverseU16(Register dst, Register val, diff --git a/src/codegen/ppc/macro-assembler-ppc.h b/src/codegen/ppc/macro-assembler-ppc.h index 497b886cf9..b2b8b0e535 100644 --- a/src/codegen/ppc/macro-assembler-ppc.h +++ b/src/codegen/ppc/macro-assembler-ppc.h @@ -143,7 +143,6 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase { void LoadDoubleLiteral(DoubleRegister result, base::Double value, Register scratch); - void LoadSimd128(Simd128Register dst, const MemOperand& mem); // load a literal signed int value to GPR void LoadIntLiteral(Register dst, int value); @@ -1025,8 +1024,6 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase { void StoreF64WithUpdate(DoubleRegister src, const MemOperand& mem, Register scratch = no_reg); - void StoreSimd128(Simd128Register src, const MemOperand& mem); - void LoadU64(Register dst, const MemOperand& mem, Register scratch = no_reg); void LoadU32(Register dst, const MemOperand& mem, Register scratch = no_reg); void LoadS32(Register dst, const MemOperand& mem, Register scratch = no_reg); @@ -1065,6 +1062,16 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase { void StoreF64LE(DoubleRegister src, const MemOperand& mem, Register scratch, Register scratch2); + // Simd Support. + void LoadSimd128(Simd128Register dst, const MemOperand& mem, + Register scratch); + void StoreSimd128(Simd128Register src, const MemOperand& mem, + Register scratch); + void LoadSimd128LE(Simd128Register dst, const MemOperand& mem, + Register scratch); + void StoreSimd128LE(Simd128Register src, const MemOperand& mem, + Register scratch1, Simd128Register scratch2); + private: static const int kSmiShift = kSmiTagSize + kSmiShiftSize; diff --git a/src/compiler/backend/ppc/code-generator-ppc.cc b/src/compiler/backend/ppc/code-generator-ppc.cc index 72628593e4..2bfac34fcd 100644 --- a/src/compiler/backend/ppc/code-generator-ppc.cc +++ b/src/compiler/backend/ppc/code-generator-ppc.cc @@ -1184,8 +1184,8 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( __ LoadF32(i.OutputFloatRegister(), MemOperand(fp, offset), r0); } else { DCHECK_EQ(MachineRepresentation::kSimd128, op->representation()); - __ mov(ip, Operand(offset)); - __ LoadSimd128(i.OutputSimd128Register(), MemOperand(fp, ip)); + __ LoadSimd128(i.OutputSimd128Register(), MemOperand(fp, offset), + kScratchReg); } } else { __ LoadU64(i.OutputRegister(), MemOperand(fp, offset), r0); @@ -1701,7 +1701,8 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( break; case MachineRepresentation::kSimd128: __ addi(sp, sp, Operand(-kSimd128Size)); - __ StoreSimd128(i.InputSimd128Register(1), MemOperand(r0, sp)); + __ StoreSimd128(i.InputSimd128Register(1), MemOperand(r0, sp), + kScratchReg); break; default: __ StoreU64WithUpdate(i.InputRegister(1), @@ -1745,8 +1746,9 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( MemOperand(sp, slot * kSystemPointerSize), r0); } else { DCHECK_EQ(MachineRepresentation::kSimd128, op->representation()); - __ mov(ip, Operand(slot * kSystemPointerSize)); - __ StoreSimd128(i.InputSimd128Register(0), MemOperand(ip, sp)); + __ StoreSimd128(i.InputSimd128Register(0), + MemOperand(sp, slot * kSystemPointerSize), + kScratchReg); } } else { __ StoreU64(i.InputRegister(0), @@ -2007,7 +2009,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( MemOperand operand = i.MemoryOperand(&mode); bool is_atomic = i.InputInt32(2); DCHECK_EQ(mode, kMode_MRR); - __ LoadSimd128(result, operand); + __ LoadSimd128(result, operand, kScratchReg); if (is_atomic) __ lwsync(); DCHECK_EQ(LeaveRC, i.OutputRCBit()); break; @@ -2044,7 +2046,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( bool is_atomic = i.InputInt32(3); if (is_atomic) __ lwsync(); DCHECK_EQ(mode, kMode_MRR); - __ StoreSimd128(value, operand); + __ StoreSimd128(value, operand, kScratchReg); if (is_atomic) __ sync(); DCHECK_EQ(LeaveRC, i.OutputRCBit()); break; @@ -4486,8 +4488,7 @@ void CodeGenerator::AssembleMove(InstructionOperand* source, } else { DCHECK(destination->IsSimd128StackSlot()); MemOperand dst = g.ToMemOperand(destination); - __ mov(ip, Operand(dst.offset())); - __ StoreSimd128(g.ToSimd128Register(source), MemOperand(dst.ra(), ip)); + __ StoreSimd128(g.ToSimd128Register(source), dst, kScratchReg); } } else { DoubleRegister src = g.ToDoubleRegister(source); @@ -4516,9 +4517,7 @@ void CodeGenerator::AssembleMove(InstructionOperand* source, } else { DCHECK_EQ(MachineRepresentation::kSimd128, op->representation()); MemOperand src = g.ToMemOperand(source); - __ mov(ip, Operand(src.offset())); - __ LoadSimd128(g.ToSimd128Register(destination), - MemOperand(src.ra(), ip)); + __ LoadSimd128(g.ToSimd128Register(destination), src, kScratchReg); } } else { LocationOperand* op = LocationOperand::cast(source); @@ -4533,10 +4532,8 @@ void CodeGenerator::AssembleMove(InstructionOperand* source, DCHECK_EQ(MachineRepresentation::kSimd128, op->representation()); MemOperand src = g.ToMemOperand(source); MemOperand dst = g.ToMemOperand(destination); - __ mov(ip, Operand(src.offset())); - __ LoadSimd128(kScratchSimd128Reg, MemOperand(src.ra(), ip)); - __ mov(ip, Operand(dst.offset())); - __ StoreSimd128(kScratchSimd128Reg, MemOperand(dst.ra(), ip)); + __ LoadSimd128(kScratchSimd128Reg, src, kScratchReg); + __ StoreSimd128(kScratchSimd128Reg, dst, kScratchReg); } } } else {