PPC: [wasm-simd] Allow simd load/store on unaligned addresses

lvx and stvx require 16-byte aligned addresses. This CL enables loading and storing to addresses which are not 16-byte aligned. Change-Id: I5635e857a979520822c8b30bb5477a159e97e6e5 Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2327648 Reviewed-by: Junliang Yan <jyan@ca.ibm.com> Commit-Queue: Milad Farazmand <miladfar@ca.ibm.com> Cr-Commit-Position: refs/heads/master@{#69135}
2020-07-29 20:46:08 +00:00 · 2020-07-29 20:46:08 +00:00 · 226a48196f
commit 226a48196f
parent 80396fd3e5
4 changed files with 57 additions and 10 deletions
--- a/src/codegen/ppc/assembler-ppc.cc
+++ b/src/codegen/ppc/assembler-ppc.cc
@ -1777,6 +1777,18 @@ void Assembler::mtvsrd(const Simd128Register rt, const Register ra) {
  emit(MTVSRD | rt.code() * B21 | ra.code() * B16 | TX);
 }

+void Assembler::lxvd(const Simd128Register rt, const MemOperand& src) {
+  int TX = 1;
+  emit(LXVD | rt.code() * B21 | src.ra().code() * B16 | src.rb().code() * B11 |
+       TX);
+}
+
+void Assembler::stxvd(const Simd128Register rt, const MemOperand& dst) {
+  int SX = 1;
+  emit(STXVD | rt.code() * B21 | dst.ra().code() * B16 | dst.rb().code() * B11 |
+       SX);
+}
+
 // Pseudo instructions.
 void Assembler::nop(int type) {
  Register reg = r0;
--- a/src/codegen/ppc/assembler-ppc.h
+++ b/src/codegen/ppc/assembler-ppc.h
@ -1019,6 +1019,8 @@ class Assembler : public AssemblerBase {
  void mfvsrd(const Register ra, const Simd128Register r);
  void mfvsrwz(const Register ra, const Simd128Register r);
  void mtvsrd(const Simd128Register rt, const Register ra);
+  void lxvd(const Simd128Register rt, const MemOperand& src);
+  void stxvd(const Simd128Register rt, const MemOperand& src);

  // Pseudo instructions

--- a/src/compiler/backend/ppc/code-generator-ppc.cc
+++ b/src/compiler/backend/ppc/code-generator-ppc.cc
@ -2044,7 +2044,20 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
      bool is_atomic = i.InputInt32(2);
      // lvx only supports MRR.
      DCHECK_EQ(mode, kMode_MRR);
-      __ lvx(result, operand);
+      // lvx needs the stack to be 16 byte aligned.
+      // We first use lxvd/stxvd to copy the content on an aligned address. lxvd
+      // itself reverses the lanes so it cannot be used as is.
+      __ lxvd(kScratchDoubleReg, operand);
+      __ mr(kScratchReg, sp);
+      __ ClearRightImm(
+          sp, sp,
+          Operand(base::bits::WhichPowerOfTwo(16)));  // equivalent to &= -16
+      __ addi(sp, sp, Operand(-16));
+      __ li(r0, Operand(0));
+      __ stxvd(kScratchDoubleReg, MemOperand(sp, r0));
+      // Load it with correct lane ordering.
+      __ lvx(result, MemOperand(sp, r0));
+      __ mr(sp, kScratchReg);
      if (is_atomic) __ lwsync();
      DCHECK_EQ(LeaveRC, i.OutputRCBit());
      break;
@ -2078,7 +2091,19 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
      if (is_atomic) __ lwsync();
      // stvx only supports MRR.
      DCHECK_EQ(mode, kMode_MRR);
-      __ stvx(value, operand);
+      // stvx needs the stack to be 16 byte aligned.
+      // We use lxvd/stxvd to store the content on an aligned address. stxvd
+      // itself reverses the lanes so it cannot be used as is.
+      __ mr(kScratchReg, sp);
+      __ ClearRightImm(
+          sp, sp,
+          Operand(base::bits::WhichPowerOfTwo(16)));  // equivalent to &= -16
+      __ addi(sp, sp, Operand(-16));
+      __ li(r0, Operand(0));
+      __ stvx(value, MemOperand(sp, r0));
+      __ lxvd(kScratchDoubleReg, MemOperand(sp, r0));
+      __ stxvd(kScratchDoubleReg, operand);
+      __ mr(sp, kScratchReg);
      if (is_atomic) __ sync();
      DCHECK_EQ(LeaveRC, i.OutputRCBit());
      break;
--- a/src/diagnostics/ppc/disasm-ppc.cc
+++ b/src/diagnostics/ppc/disasm-ppc.cc
@ -634,6 +634,22 @@ void Decoder::DecodeExt1(Instruction* instr) {
 void Decoder::DecodeExt2(Instruction* instr) {
  // Some encodings are 10-1 bits, handle those first
  switch (EXT2 | (instr->BitField(10, 1))) {
+    case LVX: {
+      Format(instr, "lvx     'Dt, 'ra, 'rb");
+      return;
+    }
+    case STVX: {
+      Format(instr, "stvx    'Dt, 'ra, 'rb");
+      return;
+    }
+    case LXVD: {
+      Format(instr, "lxvd    'Dt, 'ra, 'rb");
+      return;
+    }
+    case STXVD: {
+      Format(instr, "stxvd   'Dt, 'ra, 'rb");
+      return;
+    }
    case SRWX: {
      Format(instr, "srw'.    'ra, 'rs, 'rb");
      return;
@ -942,10 +958,6 @@ void Decoder::DecodeExt2(Instruction* instr) {
      Format(instr, "sthux   'rs, 'ra, 'rb");
      return;
    }
-    case STVX: {
-      Format(instr, "stvx    'Dt, 'ra, 'rb");
-      return;
-    }
    case LWZX: {
      Format(instr, "lwzx    'rt, 'ra, 'rb");
      return;
@ -990,10 +1002,6 @@ void Decoder::DecodeExt2(Instruction* instr) {
      Format(instr, "lwarx   'rt, 'ra, 'rb");
      return;
    }
-    case LVX: {
-      Format(instr, "lvx     'Dt, 'ra, 'rb");
-      return;
-    }
 #if V8_TARGET_ARCH_PPC64
    case LDX: {
      Format(instr, "ldx     'rt, 'ra, 'rb");