PPC: [wasm-simd] Allow simd load/store on unaligned addresses
lvx and stvx require 16-byte aligned addresses. This CL enables loading and storing to addresses which are not 16-byte aligned. Change-Id: I5635e857a979520822c8b30bb5477a159e97e6e5 Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2327648 Reviewed-by: Junliang Yan <jyan@ca.ibm.com> Commit-Queue: Milad Farazmand <miladfar@ca.ibm.com> Cr-Commit-Position: refs/heads/master@{#69135}
This commit is contained in:
parent
80396fd3e5
commit
226a48196f
@ -1777,6 +1777,18 @@ void Assembler::mtvsrd(const Simd128Register rt, const Register ra) {
|
||||
emit(MTVSRD | rt.code() * B21 | ra.code() * B16 | TX);
|
||||
}
|
||||
|
||||
void Assembler::lxvd(const Simd128Register rt, const MemOperand& src) {
|
||||
int TX = 1;
|
||||
emit(LXVD | rt.code() * B21 | src.ra().code() * B16 | src.rb().code() * B11 |
|
||||
TX);
|
||||
}
|
||||
|
||||
void Assembler::stxvd(const Simd128Register rt, const MemOperand& dst) {
|
||||
int SX = 1;
|
||||
emit(STXVD | rt.code() * B21 | dst.ra().code() * B16 | dst.rb().code() * B11 |
|
||||
SX);
|
||||
}
|
||||
|
||||
// Pseudo instructions.
|
||||
void Assembler::nop(int type) {
|
||||
Register reg = r0;
|
||||
|
@ -1019,6 +1019,8 @@ class Assembler : public AssemblerBase {
|
||||
void mfvsrd(const Register ra, const Simd128Register r);
|
||||
void mfvsrwz(const Register ra, const Simd128Register r);
|
||||
void mtvsrd(const Simd128Register rt, const Register ra);
|
||||
void lxvd(const Simd128Register rt, const MemOperand& src);
|
||||
void stxvd(const Simd128Register rt, const MemOperand& src);
|
||||
|
||||
// Pseudo instructions
|
||||
|
||||
|
@ -2044,7 +2044,20 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
|
||||
bool is_atomic = i.InputInt32(2);
|
||||
// lvx only supports MRR.
|
||||
DCHECK_EQ(mode, kMode_MRR);
|
||||
__ lvx(result, operand);
|
||||
// lvx needs the stack to be 16 byte aligned.
|
||||
// We first use lxvd/stxvd to copy the content on an aligned address. lxvd
|
||||
// itself reverses the lanes so it cannot be used as is.
|
||||
__ lxvd(kScratchDoubleReg, operand);
|
||||
__ mr(kScratchReg, sp);
|
||||
__ ClearRightImm(
|
||||
sp, sp,
|
||||
Operand(base::bits::WhichPowerOfTwo(16))); // equivalent to &= -16
|
||||
__ addi(sp, sp, Operand(-16));
|
||||
__ li(r0, Operand(0));
|
||||
__ stxvd(kScratchDoubleReg, MemOperand(sp, r0));
|
||||
// Load it with correct lane ordering.
|
||||
__ lvx(result, MemOperand(sp, r0));
|
||||
__ mr(sp, kScratchReg);
|
||||
if (is_atomic) __ lwsync();
|
||||
DCHECK_EQ(LeaveRC, i.OutputRCBit());
|
||||
break;
|
||||
@ -2078,7 +2091,19 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
|
||||
if (is_atomic) __ lwsync();
|
||||
// stvx only supports MRR.
|
||||
DCHECK_EQ(mode, kMode_MRR);
|
||||
__ stvx(value, operand);
|
||||
// stvx needs the stack to be 16 byte aligned.
|
||||
// We use lxvd/stxvd to store the content on an aligned address. stxvd
|
||||
// itself reverses the lanes so it cannot be used as is.
|
||||
__ mr(kScratchReg, sp);
|
||||
__ ClearRightImm(
|
||||
sp, sp,
|
||||
Operand(base::bits::WhichPowerOfTwo(16))); // equivalent to &= -16
|
||||
__ addi(sp, sp, Operand(-16));
|
||||
__ li(r0, Operand(0));
|
||||
__ stvx(value, MemOperand(sp, r0));
|
||||
__ lxvd(kScratchDoubleReg, MemOperand(sp, r0));
|
||||
__ stxvd(kScratchDoubleReg, operand);
|
||||
__ mr(sp, kScratchReg);
|
||||
if (is_atomic) __ sync();
|
||||
DCHECK_EQ(LeaveRC, i.OutputRCBit());
|
||||
break;
|
||||
|
@ -634,6 +634,22 @@ void Decoder::DecodeExt1(Instruction* instr) {
|
||||
void Decoder::DecodeExt2(Instruction* instr) {
|
||||
// Some encodings are 10-1 bits, handle those first
|
||||
switch (EXT2 | (instr->BitField(10, 1))) {
|
||||
case LVX: {
|
||||
Format(instr, "lvx 'Dt, 'ra, 'rb");
|
||||
return;
|
||||
}
|
||||
case STVX: {
|
||||
Format(instr, "stvx 'Dt, 'ra, 'rb");
|
||||
return;
|
||||
}
|
||||
case LXVD: {
|
||||
Format(instr, "lxvd 'Dt, 'ra, 'rb");
|
||||
return;
|
||||
}
|
||||
case STXVD: {
|
||||
Format(instr, "stxvd 'Dt, 'ra, 'rb");
|
||||
return;
|
||||
}
|
||||
case SRWX: {
|
||||
Format(instr, "srw'. 'ra, 'rs, 'rb");
|
||||
return;
|
||||
@ -942,10 +958,6 @@ void Decoder::DecodeExt2(Instruction* instr) {
|
||||
Format(instr, "sthux 'rs, 'ra, 'rb");
|
||||
return;
|
||||
}
|
||||
case STVX: {
|
||||
Format(instr, "stvx 'Dt, 'ra, 'rb");
|
||||
return;
|
||||
}
|
||||
case LWZX: {
|
||||
Format(instr, "lwzx 'rt, 'ra, 'rb");
|
||||
return;
|
||||
@ -990,10 +1002,6 @@ void Decoder::DecodeExt2(Instruction* instr) {
|
||||
Format(instr, "lwarx 'rt, 'ra, 'rb");
|
||||
return;
|
||||
}
|
||||
case LVX: {
|
||||
Format(instr, "lvx 'Dt, 'ra, 'rb");
|
||||
return;
|
||||
}
|
||||
#if V8_TARGET_ARCH_PPC64
|
||||
case LDX: {
|
||||
Format(instr, "ldx 'rt, 'ra, 'rb");
|
||||
|
Loading…
Reference in New Issue
Block a user