[wasm-simd] Implement load splat and extends on arm64
Bug: v8:9886 Change-Id: I88a4364596ef529c3873f4c80f36e0bfbe71e022 Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/1919695 Reviewed-by: Bill Budge <bbudge@chromium.org> Reviewed-by: Deepti Gandluri <gdeepti@chromium.org> Commit-Queue: Zhi An Ng <zhin@chromium.org> Cr-Commit-Position: refs/heads/master@{#65045}
This commit is contained in:
parent
f067ed8315
commit
a8c28fa1bc
@ -2495,6 +2495,52 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
|
||||
__ Add(i.OutputRegister32(), i.OutputRegister32(), 1);
|
||||
break;
|
||||
}
|
||||
case kArm64S8x16LoadSplat: {
|
||||
__ ld1r(i.OutputSimd128Register().V16B(), i.MemoryOperand(0));
|
||||
break;
|
||||
}
|
||||
case kArm64S16x8LoadSplat: {
|
||||
__ ld1r(i.OutputSimd128Register().V8H(), i.MemoryOperand(0));
|
||||
break;
|
||||
}
|
||||
case kArm64S32x4LoadSplat: {
|
||||
__ ld1r(i.OutputSimd128Register().V4S(), i.MemoryOperand(0));
|
||||
break;
|
||||
}
|
||||
case kArm64S64x2LoadSplat: {
|
||||
__ ld1r(i.OutputSimd128Register().V2D(), i.MemoryOperand(0));
|
||||
break;
|
||||
}
|
||||
case kArm64I16x8Load8x8S: {
|
||||
__ ld1(i.OutputSimd128Register().V8B(), i.MemoryOperand(0));
|
||||
__ Sxtl(i.OutputSimd128Register().V8H(), i.OutputSimd128Register().V8B());
|
||||
break;
|
||||
}
|
||||
case kArm64I16x8Load8x8U: {
|
||||
__ ld1(i.OutputSimd128Register().V8B(), i.MemoryOperand(0));
|
||||
__ Uxtl(i.OutputSimd128Register().V8H(), i.OutputSimd128Register().V8B());
|
||||
break;
|
||||
}
|
||||
case kArm64I32x4Load16x4S: {
|
||||
__ ld1(i.OutputSimd128Register().V4H(), i.MemoryOperand(0));
|
||||
__ Sxtl(i.OutputSimd128Register().V4S(), i.OutputSimd128Register().V4H());
|
||||
break;
|
||||
}
|
||||
case kArm64I32x4Load16x4U: {
|
||||
__ ld1(i.OutputSimd128Register().V4H(), i.MemoryOperand(0));
|
||||
__ Uxtl(i.OutputSimd128Register().V4S(), i.OutputSimd128Register().V4H());
|
||||
break;
|
||||
}
|
||||
case kArm64I64x2Load32x2S: {
|
||||
__ ld1(i.OutputSimd128Register().V2S(), i.MemoryOperand(0));
|
||||
__ Sxtl(i.OutputSimd128Register().V2D(), i.OutputSimd128Register().V2S());
|
||||
break;
|
||||
}
|
||||
case kArm64I64x2Load32x2U: {
|
||||
__ ld1(i.OutputSimd128Register().V2S(), i.MemoryOperand(0));
|
||||
__ Uxtl(i.OutputSimd128Register().V2D(), i.OutputSimd128Register().V2S());
|
||||
break;
|
||||
}
|
||||
#define SIMD_REDUCE_OP_CASE(Op, Instr, format, FORMAT) \
|
||||
case Op: { \
|
||||
UseScratchRegisterScope scope(tasm()); \
|
||||
|
@ -355,6 +355,16 @@ namespace compiler {
|
||||
V(Arm64S1x8AllTrue) \
|
||||
V(Arm64S1x16AnyTrue) \
|
||||
V(Arm64S1x16AllTrue) \
|
||||
V(Arm64S8x16LoadSplat) \
|
||||
V(Arm64S16x8LoadSplat) \
|
||||
V(Arm64S32x4LoadSplat) \
|
||||
V(Arm64S64x2LoadSplat) \
|
||||
V(Arm64I16x8Load8x8S) \
|
||||
V(Arm64I16x8Load8x8U) \
|
||||
V(Arm64I32x4Load16x4S) \
|
||||
V(Arm64I32x4Load16x4U) \
|
||||
V(Arm64I64x2Load32x2S) \
|
||||
V(Arm64I64x2Load32x2U) \
|
||||
V(Arm64Word64AtomicLoadUint8) \
|
||||
V(Arm64Word64AtomicLoadUint16) \
|
||||
V(Arm64Word64AtomicLoadUint32) \
|
||||
|
@ -345,6 +345,16 @@ int InstructionScheduler::GetTargetInstructionFlags(
|
||||
case kArm64LdrDecompressTaggedPointer:
|
||||
case kArm64LdrDecompressAnyTagged:
|
||||
case kArm64Peek:
|
||||
case kArm64S8x16LoadSplat:
|
||||
case kArm64S16x8LoadSplat:
|
||||
case kArm64S32x4LoadSplat:
|
||||
case kArm64S64x2LoadSplat:
|
||||
case kArm64I16x8Load8x8S:
|
||||
case kArm64I16x8Load8x8U:
|
||||
case kArm64I32x4Load16x4S:
|
||||
case kArm64I32x4Load16x4U:
|
||||
case kArm64I64x2Load32x2S:
|
||||
case kArm64I64x2Load32x2U:
|
||||
return kIsLoadOperation;
|
||||
|
||||
case kArm64Claim:
|
||||
|
@ -592,6 +592,66 @@ void EmitLoad(InstructionSelector* selector, Node* node, InstructionCode opcode,
|
||||
selector->Emit(opcode, arraysize(outputs), outputs, input_count, inputs);
|
||||
}
|
||||
|
||||
void InstructionSelector::VisitLoadTransform(Node* node) {
|
||||
LoadTransformParameters params = LoadTransformParametersOf(node->op());
|
||||
InstructionCode opcode = kArchNop;
|
||||
switch (params.transformation) {
|
||||
case LoadTransformation::kS8x16LoadSplat:
|
||||
opcode = kArm64S8x16LoadSplat;
|
||||
break;
|
||||
case LoadTransformation::kS16x8LoadSplat:
|
||||
opcode = kArm64S16x8LoadSplat;
|
||||
break;
|
||||
case LoadTransformation::kS32x4LoadSplat:
|
||||
opcode = kArm64S32x4LoadSplat;
|
||||
break;
|
||||
case LoadTransformation::kS64x2LoadSplat:
|
||||
opcode = kArm64S64x2LoadSplat;
|
||||
break;
|
||||
case LoadTransformation::kI16x8Load8x8S:
|
||||
opcode = kArm64I16x8Load8x8S;
|
||||
break;
|
||||
case LoadTransformation::kI16x8Load8x8U:
|
||||
opcode = kArm64I16x8Load8x8U;
|
||||
break;
|
||||
case LoadTransformation::kI32x4Load16x4S:
|
||||
opcode = kArm64I32x4Load16x4S;
|
||||
break;
|
||||
case LoadTransformation::kI32x4Load16x4U:
|
||||
opcode = kArm64I32x4Load16x4U;
|
||||
break;
|
||||
case LoadTransformation::kI64x2Load32x2S:
|
||||
opcode = kArm64I64x2Load32x2S;
|
||||
break;
|
||||
case LoadTransformation::kI64x2Load32x2U:
|
||||
opcode = kArm64I64x2Load32x2U;
|
||||
break;
|
||||
default:
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
// ARM64 supports unaligned loads
|
||||
DCHECK_NE(params.kind, LoadKind::kUnaligned);
|
||||
|
||||
Arm64OperandGenerator g(this);
|
||||
Node* base = node->InputAt(0);
|
||||
Node* index = node->InputAt(1);
|
||||
InstructionOperand inputs[2];
|
||||
InstructionOperand outputs[1];
|
||||
|
||||
inputs[0] = g.UseRegister(base);
|
||||
inputs[1] = g.UseRegister(index);
|
||||
outputs[0] = g.DefineAsRegister(node);
|
||||
|
||||
// ld1r uses post-index, so construct address first.
|
||||
// TODO(v8:9886) If index can be immediate, use vldr without this add.
|
||||
InstructionOperand addr = g.TempRegister();
|
||||
Emit(kArm64Add, 1, &addr, 2, inputs);
|
||||
inputs[0] = addr;
|
||||
inputs[1] = g.TempImmediate(0);
|
||||
opcode |= AddressingModeField::encode(kMode_MRI);
|
||||
Emit(opcode, 1, outputs, 2, inputs);
|
||||
}
|
||||
|
||||
void InstructionSelector::VisitLoad(Node* node) {
|
||||
InstructionCode opcode = kArchNop;
|
||||
ImmediateMode immediate_mode = kNoImmediate;
|
||||
|
@ -2614,7 +2614,6 @@ void InstructionSelector::VisitI64x2ReplaceLaneI32Pair(Node* node) {
|
||||
#endif // !V8_TARGET_ARCH_IA32
|
||||
|
||||
#if !V8_TARGET_ARCH_X64
|
||||
void InstructionSelector::VisitLoadTransform(Node* node) { UNIMPLEMENTED(); }
|
||||
void InstructionSelector::VisitF64x2SConvertI64x2(Node* node) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
@ -2622,6 +2621,7 @@ void InstructionSelector::VisitF64x2UConvertI64x2(Node* node) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
#if !V8_TARGET_ARCH_ARM64
|
||||
void InstructionSelector::VisitLoadTransform(Node* node) { UNIMPLEMENTED(); }
|
||||
#if !V8_TARGET_ARCH_IA32
|
||||
void InstructionSelector::VisitF64x2Min(Node* node) { UNIMPLEMENTED(); }
|
||||
void InstructionSelector::VisitF64x2Max(Node* node) { UNIMPLEMENTED(); }
|
||||
|
@ -3273,7 +3273,7 @@ WASM_SIMD_TEST(SimdLoadStoreLoadMemargOffset) {
|
||||
}
|
||||
}
|
||||
|
||||
#if V8_TARGET_ARCH_X64
|
||||
#if V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64
|
||||
template <typename T>
|
||||
void RunLoadSplatTest(ExecutionTier execution_tier, LowerSimd lower_simd,
|
||||
WasmOpcode op) {
|
||||
@ -3352,7 +3352,6 @@ WASM_SIMD_TEST_NO_LOWERING(I16x8Load8x8S) {
|
||||
RunLoadExtendTest<int8_t, int16_t>(execution_tier, lower_simd,
|
||||
kExprI16x8Load8x8S);
|
||||
}
|
||||
|
||||
WASM_SIMD_TEST_NO_LOWERING(I32x4Load16x4U) {
|
||||
RunLoadExtendTest<uint16_t, uint32_t>(execution_tier, lower_simd,
|
||||
kExprI32x4Load16x4U);
|
||||
@ -3372,7 +3371,7 @@ WASM_SIMD_TEST_NO_LOWERING(I64x2Load32x2S) {
|
||||
RunLoadExtendTest<int32_t, int64_t>(execution_tier, lower_simd,
|
||||
kExprI64x2Load32x2S);
|
||||
}
|
||||
#endif // V8_TARGET_ARCH_X64
|
||||
#endif // V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64
|
||||
|
||||
#if V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_IA32 || V8_TARGET_ARCH_ARM64 || \
|
||||
V8_TARGET_ARCH_ARM
|
||||
|
Loading…
Reference in New Issue
Block a user