PPC: [wasm-simd] Implement Load Transform on PPC LE

Change-Id: I3bb6a6822dea5ce6aa3e12f3137861a2f93bbb68
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2560604
Reviewed-by: Junliang Yan <junyan@redhat.com>
Commit-Queue: Milad Fa <mfarazma@redhat.com>
Cr-Commit-Position: refs/heads/master@{#71416}
This commit is contained in:
Milad Fa 2020-11-25 11:16:10 -05:00 committed by Commit Bot
parent ff4129195e
commit 8201f02716
5 changed files with 177 additions and 3 deletions

View File

@ -2373,6 +2373,8 @@ using Instr = uint32_t;
V(vbpermq, VBPERMQ, 0x1000054C)
#define PPC_VX_OPCODE_C_FORM_LIST(V) \
/* Vector Unpack High Signed Word */ \
V(vupkhsw, VUPKHSW, 0x1000064E) \
/* Vector Unpack Low Signed Halfword */ \
V(vupklsh, VUPKLSH, 0x100002CE) \
/* Vector Unpack High Signed Halfword */ \
@ -2547,8 +2549,6 @@ using Instr = uint32_t;
V(vsumsws, VSUMSWS, 0x10000788) \
/* Vector Unpack High Pixel */ \
V(vupkhpx, VUPKHPX, 0x1000034E) \
/* Vector Unpack High Signed Word */ \
V(vupkhsw, VUPKHSW, 0x1000064E) \
/* Vector Unpack Low Pixel */ \
V(vupklpx, VUPKLPX, 0x100003CE) \
/* Vector Unpack Low Signed Word */ \

View File

@ -3464,6 +3464,107 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ vsel(dst, src0, src1, kScratchDoubleReg);
break;
}
#define ASSEMBLE_LOAD_TRANSFORM(scratch) \
AddressingMode mode = kMode_None; \
MemOperand operand = i.MemoryOperand(&mode); \
DCHECK_EQ(mode, kMode_MRR); \
__ lxvd(scratch, operand);
case kPPC_S128Load8Splat: {
Simd128Register dst = i.OutputSimd128Register();
ASSEMBLE_LOAD_TRANSFORM(kScratchDoubleReg)
__ vspltb(dst, kScratchDoubleReg, Operand(7));
break;
}
case kPPC_S128Load16Splat: {
Simd128Register dst = i.OutputSimd128Register();
ASSEMBLE_LOAD_TRANSFORM(kScratchDoubleReg)
__ vsplth(dst, kScratchDoubleReg, Operand(3));
break;
}
case kPPC_S128Load32Splat: {
Simd128Register dst = i.OutputSimd128Register();
ASSEMBLE_LOAD_TRANSFORM(kScratchDoubleReg)
__ vspltw(dst, kScratchDoubleReg, Operand(1));
break;
}
case kPPC_S128Load64Splat: {
constexpr int lane_width_in_bytes = 8;
Simd128Register dst = i.OutputSimd128Register();
ASSEMBLE_LOAD_TRANSFORM(dst)
__ vinsertd(dst, dst, Operand(1 * lane_width_in_bytes));
break;
}
case kPPC_S128Load8x8S: {
Simd128Register dst = i.OutputSimd128Register();
ASSEMBLE_LOAD_TRANSFORM(kScratchDoubleReg)
__ vupkhsb(dst, kScratchDoubleReg);
break;
}
case kPPC_S128Load8x8U: {
Simd128Register dst = i.OutputSimd128Register();
ASSEMBLE_LOAD_TRANSFORM(kScratchDoubleReg)
__ vupkhsb(dst, kScratchDoubleReg);
// Zero extend.
__ li(ip, Operand(0xFF));
__ mtvsrd(kScratchDoubleReg, ip);
__ vsplth(kScratchDoubleReg, kScratchDoubleReg, Operand(3));
__ vand(dst, kScratchDoubleReg, dst);
break;
}
case kPPC_S128Load16x4S: {
Simd128Register dst = i.OutputSimd128Register();
ASSEMBLE_LOAD_TRANSFORM(kScratchDoubleReg)
__ vupkhsh(dst, kScratchDoubleReg);
break;
}
case kPPC_S128Load16x4U: {
Simd128Register dst = i.OutputSimd128Register();
ASSEMBLE_LOAD_TRANSFORM(kScratchDoubleReg)
__ vupkhsh(dst, kScratchDoubleReg);
// Zero extend.
__ mov(ip, Operand(0xFFFF));
__ mtvsrd(kScratchDoubleReg, ip);
__ vspltw(kScratchDoubleReg, kScratchDoubleReg, Operand(1));
__ vand(dst, kScratchDoubleReg, dst);
break;
}
case kPPC_S128Load32x2S: {
Simd128Register dst = i.OutputSimd128Register();
ASSEMBLE_LOAD_TRANSFORM(kScratchDoubleReg)
__ vupkhsw(dst, kScratchDoubleReg);
break;
}
case kPPC_S128Load32x2U: {
constexpr int lane_width_in_bytes = 8;
Simd128Register dst = i.OutputSimd128Register();
ASSEMBLE_LOAD_TRANSFORM(kScratchDoubleReg)
__ vupkhsw(dst, kScratchDoubleReg);
// Zero extend.
__ mov(ip, Operand(0xFFFFFFFF));
__ mtvsrd(kScratchDoubleReg, ip);
__ vinsertd(kScratchDoubleReg, kScratchDoubleReg,
Operand(1 * lane_width_in_bytes));
__ vand(dst, kScratchDoubleReg, dst);
break;
}
case kPPC_S128Load32Zero: {
constexpr int lane_width_in_bytes = 4;
Simd128Register dst = i.OutputSimd128Register();
ASSEMBLE_LOAD_TRANSFORM(kScratchDoubleReg)
__ vxor(dst, dst, dst);
__ vinsertw(dst, kScratchDoubleReg, Operand(3 * lane_width_in_bytes));
break;
}
case kPPC_S128Load64Zero: {
constexpr int lane_width_in_bytes = 8;
Simd128Register dst = i.OutputSimd128Register();
ASSEMBLE_LOAD_TRANSFORM(kScratchDoubleReg)
__ vxor(dst, dst, dst);
__ vinsertd(dst, kScratchDoubleReg, Operand(1 * lane_width_in_bytes));
break;
}
#undef ASSEMBLE_LOAD_TRANSFORM
case kPPC_StoreCompressTagged: {
ASSEMBLE_STORE_INTEGER(StoreTaggedField, StoreTaggedFieldX);
break;

View File

@ -379,6 +379,18 @@ namespace compiler {
V(PPC_S128Not) \
V(PPC_S128Select) \
V(PPC_S128AndNot) \
V(PPC_S128Load8Splat) \
V(PPC_S128Load16Splat) \
V(PPC_S128Load32Splat) \
V(PPC_S128Load64Splat) \
V(PPC_S128Load8x8S) \
V(PPC_S128Load8x8U) \
V(PPC_S128Load16x4S) \
V(PPC_S128Load16x4U) \
V(PPC_S128Load32x2S) \
V(PPC_S128Load32x2U) \
V(PPC_S128Load32Zero) \
V(PPC_S128Load64Zero) \
V(PPC_StoreCompressTagged) \
V(PPC_LoadDecompressTaggedSigned) \
V(PPC_LoadDecompressTaggedPointer) \

View File

@ -322,6 +322,18 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kPPC_LoadDecompressTaggedSigned:
case kPPC_LoadDecompressTaggedPointer:
case kPPC_LoadDecompressAnyTagged:
case kPPC_S128Load8Splat:
case kPPC_S128Load16Splat:
case kPPC_S128Load32Splat:
case kPPC_S128Load64Splat:
case kPPC_S128Load8x8S:
case kPPC_S128Load8x8U:
case kPPC_S128Load16x4S:
case kPPC_S128Load16x4U:
case kPPC_S128Load32x2S:
case kPPC_S128Load32x2U:
case kPPC_S128Load32Zero:
case kPPC_S128Load64Zero:
return kIsLoadOperation;
case kPPC_StoreWord8:

View File

@ -2483,7 +2483,56 @@ void InstructionSelector::EmitPrepareResults(
}
}
void InstructionSelector::VisitLoadTransform(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitLoadTransform(Node* node) {
LoadTransformParameters params = LoadTransformParametersOf(node->op());
PPCOperandGenerator g(this);
Node* base = node->InputAt(0);
Node* index = node->InputAt(1);
ArchOpcode opcode;
switch (params.transformation) {
case LoadTransformation::kS128Load8Splat:
opcode = kPPC_S128Load8Splat;
break;
case LoadTransformation::kS128Load16Splat:
opcode = kPPC_S128Load16Splat;
break;
case LoadTransformation::kS128Load32Splat:
opcode = kPPC_S128Load32Splat;
break;
case LoadTransformation::kS128Load64Splat:
opcode = kPPC_S128Load64Splat;
break;
case LoadTransformation::kS128Load8x8S:
opcode = kPPC_S128Load8x8S;
break;
case LoadTransformation::kS128Load8x8U:
opcode = kPPC_S128Load8x8U;
break;
case LoadTransformation::kS128Load16x4S:
opcode = kPPC_S128Load16x4S;
break;
case LoadTransformation::kS128Load16x4U:
opcode = kPPC_S128Load16x4U;
break;
case LoadTransformation::kS128Load32x2S:
opcode = kPPC_S128Load32x2S;
break;
case LoadTransformation::kS128Load32x2U:
opcode = kPPC_S128Load32x2U;
break;
case LoadTransformation::kS128Load32Zero:
opcode = kPPC_S128Load32Zero;
break;
case LoadTransformation::kS128Load64Zero:
opcode = kPPC_S128Load64Zero;
break;
default:
UNREACHABLE();
}
Emit(opcode | AddressingModeField::encode(kMode_MRR),
g.DefineAsRegister(node), g.UseRegister(base), g.UseRegister(index));
}
// static
MachineOperatorBuilder::Flags