From 8201f027162377bb788302170eb9297d88d9ef04 Mon Sep 17 00:00:00 2001 From: Milad Fa Date: Wed, 25 Nov 2020 11:16:10 -0500 Subject: [PATCH] PPC: [wasm-simd] Implement Load Transform on PPC LE Change-Id: I3bb6a6822dea5ce6aa3e12f3137861a2f93bbb68 Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2560604 Reviewed-by: Junliang Yan Commit-Queue: Milad Fa Cr-Commit-Position: refs/heads/master@{#71416} --- src/codegen/ppc/constants-ppc.h | 4 +- .../backend/ppc/code-generator-ppc.cc | 101 ++++++++++++++++++ .../backend/ppc/instruction-codes-ppc.h | 12 +++ .../backend/ppc/instruction-scheduler-ppc.cc | 12 +++ .../backend/ppc/instruction-selector-ppc.cc | 51 ++++++++- 5 files changed, 177 insertions(+), 3 deletions(-) diff --git a/src/codegen/ppc/constants-ppc.h b/src/codegen/ppc/constants-ppc.h index f71d1beae3..36bc459f58 100644 --- a/src/codegen/ppc/constants-ppc.h +++ b/src/codegen/ppc/constants-ppc.h @@ -2373,6 +2373,8 @@ using Instr = uint32_t; V(vbpermq, VBPERMQ, 0x1000054C) #define PPC_VX_OPCODE_C_FORM_LIST(V) \ + /* Vector Unpack High Signed Word */ \ + V(vupkhsw, VUPKHSW, 0x1000064E) \ /* Vector Unpack Low Signed Halfword */ \ V(vupklsh, VUPKLSH, 0x100002CE) \ /* Vector Unpack High Signed Halfword */ \ @@ -2547,8 +2549,6 @@ using Instr = uint32_t; V(vsumsws, VSUMSWS, 0x10000788) \ /* Vector Unpack High Pixel */ \ V(vupkhpx, VUPKHPX, 0x1000034E) \ - /* Vector Unpack High Signed Word */ \ - V(vupkhsw, VUPKHSW, 0x1000064E) \ /* Vector Unpack Low Pixel */ \ V(vupklpx, VUPKLPX, 0x100003CE) \ /* Vector Unpack Low Signed Word */ \ diff --git a/src/compiler/backend/ppc/code-generator-ppc.cc b/src/compiler/backend/ppc/code-generator-ppc.cc index cb0a234a56..734ca30505 100644 --- a/src/compiler/backend/ppc/code-generator-ppc.cc +++ b/src/compiler/backend/ppc/code-generator-ppc.cc @@ -3464,6 +3464,107 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( __ vsel(dst, src0, src1, kScratchDoubleReg); break; } +#define ASSEMBLE_LOAD_TRANSFORM(scratch) \ + AddressingMode mode = kMode_None; \ + MemOperand operand = i.MemoryOperand(&mode); \ + DCHECK_EQ(mode, kMode_MRR); \ + __ lxvd(scratch, operand); + case kPPC_S128Load8Splat: { + Simd128Register dst = i.OutputSimd128Register(); + ASSEMBLE_LOAD_TRANSFORM(kScratchDoubleReg) + __ vspltb(dst, kScratchDoubleReg, Operand(7)); + break; + } + case kPPC_S128Load16Splat: { + Simd128Register dst = i.OutputSimd128Register(); + ASSEMBLE_LOAD_TRANSFORM(kScratchDoubleReg) + __ vsplth(dst, kScratchDoubleReg, Operand(3)); + break; + } + case kPPC_S128Load32Splat: { + Simd128Register dst = i.OutputSimd128Register(); + ASSEMBLE_LOAD_TRANSFORM(kScratchDoubleReg) + __ vspltw(dst, kScratchDoubleReg, Operand(1)); + break; + } + case kPPC_S128Load64Splat: { + constexpr int lane_width_in_bytes = 8; + Simd128Register dst = i.OutputSimd128Register(); + ASSEMBLE_LOAD_TRANSFORM(dst) + __ vinsertd(dst, dst, Operand(1 * lane_width_in_bytes)); + break; + } + case kPPC_S128Load8x8S: { + Simd128Register dst = i.OutputSimd128Register(); + ASSEMBLE_LOAD_TRANSFORM(kScratchDoubleReg) + __ vupkhsb(dst, kScratchDoubleReg); + break; + } + case kPPC_S128Load8x8U: { + Simd128Register dst = i.OutputSimd128Register(); + ASSEMBLE_LOAD_TRANSFORM(kScratchDoubleReg) + __ vupkhsb(dst, kScratchDoubleReg); + // Zero extend. + __ li(ip, Operand(0xFF)); + __ mtvsrd(kScratchDoubleReg, ip); + __ vsplth(kScratchDoubleReg, kScratchDoubleReg, Operand(3)); + __ vand(dst, kScratchDoubleReg, dst); + break; + } + case kPPC_S128Load16x4S: { + Simd128Register dst = i.OutputSimd128Register(); + ASSEMBLE_LOAD_TRANSFORM(kScratchDoubleReg) + __ vupkhsh(dst, kScratchDoubleReg); + break; + } + case kPPC_S128Load16x4U: { + Simd128Register dst = i.OutputSimd128Register(); + ASSEMBLE_LOAD_TRANSFORM(kScratchDoubleReg) + __ vupkhsh(dst, kScratchDoubleReg); + // Zero extend. + __ mov(ip, Operand(0xFFFF)); + __ mtvsrd(kScratchDoubleReg, ip); + __ vspltw(kScratchDoubleReg, kScratchDoubleReg, Operand(1)); + __ vand(dst, kScratchDoubleReg, dst); + + break; + } + case kPPC_S128Load32x2S: { + Simd128Register dst = i.OutputSimd128Register(); + ASSEMBLE_LOAD_TRANSFORM(kScratchDoubleReg) + __ vupkhsw(dst, kScratchDoubleReg); + break; + } + case kPPC_S128Load32x2U: { + constexpr int lane_width_in_bytes = 8; + Simd128Register dst = i.OutputSimd128Register(); + ASSEMBLE_LOAD_TRANSFORM(kScratchDoubleReg) + __ vupkhsw(dst, kScratchDoubleReg); + // Zero extend. + __ mov(ip, Operand(0xFFFFFFFF)); + __ mtvsrd(kScratchDoubleReg, ip); + __ vinsertd(kScratchDoubleReg, kScratchDoubleReg, + Operand(1 * lane_width_in_bytes)); + __ vand(dst, kScratchDoubleReg, dst); + break; + } + case kPPC_S128Load32Zero: { + constexpr int lane_width_in_bytes = 4; + Simd128Register dst = i.OutputSimd128Register(); + ASSEMBLE_LOAD_TRANSFORM(kScratchDoubleReg) + __ vxor(dst, dst, dst); + __ vinsertw(dst, kScratchDoubleReg, Operand(3 * lane_width_in_bytes)); + break; + } + case kPPC_S128Load64Zero: { + constexpr int lane_width_in_bytes = 8; + Simd128Register dst = i.OutputSimd128Register(); + ASSEMBLE_LOAD_TRANSFORM(kScratchDoubleReg) + __ vxor(dst, dst, dst); + __ vinsertd(dst, kScratchDoubleReg, Operand(1 * lane_width_in_bytes)); + break; + } +#undef ASSEMBLE_LOAD_TRANSFORM case kPPC_StoreCompressTagged: { ASSEMBLE_STORE_INTEGER(StoreTaggedField, StoreTaggedFieldX); break; diff --git a/src/compiler/backend/ppc/instruction-codes-ppc.h b/src/compiler/backend/ppc/instruction-codes-ppc.h index a4f765260e..206dad7d5b 100644 --- a/src/compiler/backend/ppc/instruction-codes-ppc.h +++ b/src/compiler/backend/ppc/instruction-codes-ppc.h @@ -379,6 +379,18 @@ namespace compiler { V(PPC_S128Not) \ V(PPC_S128Select) \ V(PPC_S128AndNot) \ + V(PPC_S128Load8Splat) \ + V(PPC_S128Load16Splat) \ + V(PPC_S128Load32Splat) \ + V(PPC_S128Load64Splat) \ + V(PPC_S128Load8x8S) \ + V(PPC_S128Load8x8U) \ + V(PPC_S128Load16x4S) \ + V(PPC_S128Load16x4U) \ + V(PPC_S128Load32x2S) \ + V(PPC_S128Load32x2U) \ + V(PPC_S128Load32Zero) \ + V(PPC_S128Load64Zero) \ V(PPC_StoreCompressTagged) \ V(PPC_LoadDecompressTaggedSigned) \ V(PPC_LoadDecompressTaggedPointer) \ diff --git a/src/compiler/backend/ppc/instruction-scheduler-ppc.cc b/src/compiler/backend/ppc/instruction-scheduler-ppc.cc index 31bf1e4aad..2de4f6a4c5 100644 --- a/src/compiler/backend/ppc/instruction-scheduler-ppc.cc +++ b/src/compiler/backend/ppc/instruction-scheduler-ppc.cc @@ -322,6 +322,18 @@ int InstructionScheduler::GetTargetInstructionFlags( case kPPC_LoadDecompressTaggedSigned: case kPPC_LoadDecompressTaggedPointer: case kPPC_LoadDecompressAnyTagged: + case kPPC_S128Load8Splat: + case kPPC_S128Load16Splat: + case kPPC_S128Load32Splat: + case kPPC_S128Load64Splat: + case kPPC_S128Load8x8S: + case kPPC_S128Load8x8U: + case kPPC_S128Load16x4S: + case kPPC_S128Load16x4U: + case kPPC_S128Load32x2S: + case kPPC_S128Load32x2U: + case kPPC_S128Load32Zero: + case kPPC_S128Load64Zero: return kIsLoadOperation; case kPPC_StoreWord8: diff --git a/src/compiler/backend/ppc/instruction-selector-ppc.cc b/src/compiler/backend/ppc/instruction-selector-ppc.cc index f3552492b2..bd7d230a98 100644 --- a/src/compiler/backend/ppc/instruction-selector-ppc.cc +++ b/src/compiler/backend/ppc/instruction-selector-ppc.cc @@ -2483,7 +2483,56 @@ void InstructionSelector::EmitPrepareResults( } } -void InstructionSelector::VisitLoadTransform(Node* node) { UNIMPLEMENTED(); } +void InstructionSelector::VisitLoadTransform(Node* node) { + LoadTransformParameters params = LoadTransformParametersOf(node->op()); + PPCOperandGenerator g(this); + Node* base = node->InputAt(0); + Node* index = node->InputAt(1); + + ArchOpcode opcode; + switch (params.transformation) { + case LoadTransformation::kS128Load8Splat: + opcode = kPPC_S128Load8Splat; + break; + case LoadTransformation::kS128Load16Splat: + opcode = kPPC_S128Load16Splat; + break; + case LoadTransformation::kS128Load32Splat: + opcode = kPPC_S128Load32Splat; + break; + case LoadTransformation::kS128Load64Splat: + opcode = kPPC_S128Load64Splat; + break; + case LoadTransformation::kS128Load8x8S: + opcode = kPPC_S128Load8x8S; + break; + case LoadTransformation::kS128Load8x8U: + opcode = kPPC_S128Load8x8U; + break; + case LoadTransformation::kS128Load16x4S: + opcode = kPPC_S128Load16x4S; + break; + case LoadTransformation::kS128Load16x4U: + opcode = kPPC_S128Load16x4U; + break; + case LoadTransformation::kS128Load32x2S: + opcode = kPPC_S128Load32x2S; + break; + case LoadTransformation::kS128Load32x2U: + opcode = kPPC_S128Load32x2U; + break; + case LoadTransformation::kS128Load32Zero: + opcode = kPPC_S128Load32Zero; + break; + case LoadTransformation::kS128Load64Zero: + opcode = kPPC_S128Load64Zero; + break; + default: + UNREACHABLE(); + } + Emit(opcode | AddressingModeField::encode(kMode_MRR), + g.DefineAsRegister(node), g.UseRegister(base), g.UseRegister(index)); +} // static MachineOperatorBuilder::Flags