S390 [simd]: Implement vector load and splat
This CL takes advantage of the z15 `load reverse and replicate` instruction to optimize Simd LoadSplat opcodes. On the simulator we only run `load replicate` as reversing is not required. We will need to implement the rest of the `load transform` ops before enabling this from wasm-compiler on BE machines. Change-Id: I81ffedf51c3d35dbbc2a6455a2756cad25434127 Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3115142 Reviewed-by: Junliang Yan <junyan@redhat.com> Commit-Queue: Milad Fa <mfarazma@redhat.com> Cr-Commit-Position: refs/heads/main@{#76490}
This commit is contained in:
parent
409e02c1dd
commit
3489bdf8d1
@ -1559,8 +1559,10 @@ using SixByteInstr = uint64_t;
|
||||
V(vstef, VSTEF, 0xE70B) /* type = VRX VECTOR STORE ELEMENT (32) */ \
|
||||
V(vst, VST, 0xE70E) /* type = VRX VECTOR STORE */ \
|
||||
V(vlbr, VLBR, 0xE606) /* type = VRX VECTOR LOAD BYTE REVERSED ELEMENTS */ \
|
||||
V(vstbr, VSTBR, 0xE60E) /* type = VRX VECTOR STORE BYTE REVERSED ELEMENTS \
|
||||
*/
|
||||
V(vstbr, VSTBR, \
|
||||
0xE60E) /* type = VRX VECTOR STORE BYTE REVERSED ELEMENTS */ \
|
||||
V(vlbrrep, VLBRREP, \
|
||||
0xE605) /* type = VRX VECTOR LOAD BYTE REVERSED ELEMENT AND REPLICATE */
|
||||
|
||||
#define S390_RIE_G_OPCODE_LIST(V) \
|
||||
V(lochi, LOCHI, \
|
||||
|
@ -3924,6 +3924,31 @@ void TurboAssembler::StoreV128LE(Simd128Register src, const MemOperand& mem,
|
||||
}
|
||||
}
|
||||
|
||||
void TurboAssembler::LoadAndSplat8x16LE(Simd128Register dst,
|
||||
const MemOperand& mem) {
|
||||
vlrep(dst, mem, Condition(0));
|
||||
}
|
||||
#define LOAD_SPLAT_LIST(V) \
|
||||
V(64x2, LoadU64LE, 3) \
|
||||
V(32x4, LoadU32LE, 2) \
|
||||
V(16x8, LoadU16LE, 1)
|
||||
|
||||
#define LOAD_SPLAT(name, scalar_instr, condition) \
|
||||
void TurboAssembler::LoadAndSplat##name##LE(Simd128Register dst, \
|
||||
const MemOperand& mem) { \
|
||||
if (CpuFeatures::IsSupported(VECTOR_ENHANCE_FACILITY_2) && \
|
||||
is_uint12(mem.offset())) { \
|
||||
vlbrrep(dst, mem, Condition(condition)); \
|
||||
return; \
|
||||
} \
|
||||
scalar_instr(r1, mem); \
|
||||
vlvg(dst, r1, MemOperand(r0, 0), Condition(condition)); \
|
||||
vrep(dst, dst, Operand(0), Condition(condition)); \
|
||||
}
|
||||
LOAD_SPLAT_LIST(LOAD_SPLAT)
|
||||
#undef LOAD_SPLAT
|
||||
#undef LOAD_SPLAT_LIST
|
||||
|
||||
#else
|
||||
void TurboAssembler::LoadU64LE(Register dst, const MemOperand& mem,
|
||||
Register scratch) {
|
||||
@ -3996,6 +4021,21 @@ void TurboAssembler::StoreV128LE(Simd128Register src, const MemOperand& mem,
|
||||
StoreV128(src, mem, scratch1);
|
||||
}
|
||||
|
||||
#define LOAD_SPLAT_LIST(V) \
|
||||
V(64x2, 3) \
|
||||
V(32x4, 2) \
|
||||
V(16x8, 1) \
|
||||
V(8x16, 0)
|
||||
|
||||
#define LOAD_SPLAT(name, condition) \
|
||||
void TurboAssembler::LoadAndSplat##name##LE(Simd128Register dst, \
|
||||
const MemOperand& mem) { \
|
||||
vlrep(dst, mem, Condition(condition)); \
|
||||
}
|
||||
LOAD_SPLAT_LIST(LOAD_SPLAT)
|
||||
#undef LOAD_SPLAT
|
||||
#undef LOAD_SPLAT_LIST
|
||||
|
||||
#endif
|
||||
|
||||
// Load And Test (Reg <- Reg)
|
||||
|
@ -392,6 +392,10 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
|
||||
Register scratch1);
|
||||
void LoadF64LE(DoubleRegister dst, const MemOperand& opnd, Register scratch);
|
||||
void LoadF32LE(DoubleRegister dst, const MemOperand& opnd, Register scratch);
|
||||
void LoadAndSplat64x2LE(Simd128Register dst, const MemOperand& mem);
|
||||
void LoadAndSplat32x4LE(Simd128Register dst, const MemOperand& mem);
|
||||
void LoadAndSplat16x8LE(Simd128Register dst, const MemOperand& mem);
|
||||
void LoadAndSplat8x16LE(Simd128Register dst, const MemOperand& mem);
|
||||
|
||||
// Load And Test
|
||||
void LoadAndTest32(Register dst, Register src);
|
||||
|
@ -2218,6 +2218,28 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
|
||||
__ vl(i.OutputSimd128Register(), operand, Condition(0));
|
||||
break;
|
||||
}
|
||||
#define LOAD_SPLAT(type) \
|
||||
AddressingMode mode = kMode_None; \
|
||||
MemOperand operand = i.MemoryOperand(&mode); \
|
||||
Simd128Register dst = i.OutputSimd128Register(); \
|
||||
__ LoadAndSplat##type##LE(dst, operand);
|
||||
case kS390_S128Load8Splat: {
|
||||
LOAD_SPLAT(8x16);
|
||||
break;
|
||||
}
|
||||
case kS390_S128Load16Splat: {
|
||||
LOAD_SPLAT(16x8);
|
||||
break;
|
||||
}
|
||||
case kS390_S128Load32Splat: {
|
||||
LOAD_SPLAT(32x4);
|
||||
break;
|
||||
}
|
||||
case kS390_S128Load64Splat: {
|
||||
LOAD_SPLAT(64x2);
|
||||
break;
|
||||
}
|
||||
#undef LOAD_SPLAT
|
||||
case kS390_StoreWord8:
|
||||
ASSEMBLE_STORE_INTEGER(StoreU8);
|
||||
break;
|
||||
|
@ -372,6 +372,10 @@ namespace compiler {
|
||||
V(S390_S128Not) \
|
||||
V(S390_S128Select) \
|
||||
V(S390_S128AndNot) \
|
||||
V(S390_S128Load8Splat) \
|
||||
V(S390_S128Load16Splat) \
|
||||
V(S390_S128Load32Splat) \
|
||||
V(S390_S128Load64Splat) \
|
||||
V(S390_StoreSimd128) \
|
||||
V(S390_LoadSimd128) \
|
||||
V(S390_StoreCompressTagged) \
|
||||
|
@ -359,6 +359,10 @@ int InstructionScheduler::GetTargetInstructionFlags(
|
||||
case kS390_LoadDecompressTaggedSigned:
|
||||
case kS390_LoadDecompressTaggedPointer:
|
||||
case kS390_LoadDecompressAnyTagged:
|
||||
case kS390_S128Load8Splat:
|
||||
case kS390_S128Load16Splat:
|
||||
case kS390_S128Load32Splat:
|
||||
case kS390_S128Load64Splat:
|
||||
return kIsLoadOperation;
|
||||
|
||||
case kS390_StoreWord8:
|
||||
|
@ -2793,8 +2793,25 @@ void InstructionSelector::VisitLoadLane(Node* node) {
|
||||
}
|
||||
|
||||
void InstructionSelector::VisitLoadTransform(Node* node) {
|
||||
// We should never reach here, see http://crrev.com/c/2050811
|
||||
UNREACHABLE();
|
||||
LoadTransformParameters params = LoadTransformParametersOf(node->op());
|
||||
ArchOpcode opcode;
|
||||
switch (params.transformation) {
|
||||
case LoadTransformation::kS128Load8Splat:
|
||||
opcode = kS390_S128Load8Splat;
|
||||
break;
|
||||
case LoadTransformation::kS128Load16Splat:
|
||||
opcode = kS390_S128Load16Splat;
|
||||
break;
|
||||
case LoadTransformation::kS128Load32Splat:
|
||||
opcode = kS390_S128Load32Splat;
|
||||
break;
|
||||
case LoadTransformation::kS128Load64Splat:
|
||||
opcode = kS390_S128Load64Splat;
|
||||
break;
|
||||
default:
|
||||
UNREACHABLE();
|
||||
}
|
||||
VisitLoad(node, node, opcode);
|
||||
}
|
||||
|
||||
void InstructionSelector::VisitStoreLane(Node* node) {
|
||||
|
Loading…
Reference in New Issue
Block a user