[wasm-simd] Implement i64x2 splat extract replace for arm

Bug: v8:9813
Change-Id: Ie99fdbf5307a1515a1838ac6902a5bcd99d11e14
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/1900660
Reviewed-by: Deepti Gandluri <gdeepti@chromium.org>
Commit-Queue: Zhi An Ng <zhin@chromium.org>
Cr-Commit-Position: refs/heads/master@{#64846}
This commit is contained in:
Ng Zhi An 2019-11-06 09:41:17 -08:00 committed by Commit Bot
parent dde3166beb
commit 5e514a9693
6 changed files with 37 additions and 2 deletions

View File

@ -1909,6 +1909,20 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ vmov(i.OutputSimd128Register().high(), scratch, scratch);
break;
}
case kArmI64x2SplatI32Pair: {
Simd128Register dst = i.OutputSimd128Register();
__ vdup(Neon32, dst, i.InputRegister(0));
__ ReplaceLane(dst, dst, i.InputRegister(1), NeonS32, 1);
__ ReplaceLane(dst, dst, i.InputRegister(1), NeonS32, 3);
break;
}
case kArmI64x2ReplaceLaneI32Pair: {
Simd128Register dst = i.OutputSimd128Register();
int8_t lane = i.InputInt8(1);
__ ReplaceLane(dst, dst, i.InputRegister(2), NeonS32, lane * 2);
__ ReplaceLane(dst, dst, i.InputRegister(3), NeonS32, lane * 2 + 1);
break;
}
case kArmF32x4Splat: {
int src_code = i.InputFloatRegister(0).code();
__ vdup(Neon32, i.OutputSimd128Register(),

View File

@ -142,6 +142,8 @@ namespace compiler {
V(ArmF64x2Ne) \
V(ArmF64x2Lt) \
V(ArmF64x2Le) \
V(ArmI64x2SplatI32Pair) \
V(ArmI64x2ReplaceLaneI32Pair) \
V(ArmF32x4Splat) \
V(ArmF32x4ExtractLane) \
V(ArmF32x4ReplaceLane) \

View File

@ -122,6 +122,8 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kArmF64x2Ne:
case kArmF64x2Lt:
case kArmF64x2Le:
case kArmI64x2SplatI32Pair:
case kArmI64x2ReplaceLaneI32Pair:
case kArmF32x4Splat:
case kArmF32x4ExtractLane:
case kArmF32x4ReplaceLane:

View File

@ -2573,6 +2573,23 @@ SIMD_BINOP_LIST(SIMD_VISIT_BINOP)
#undef SIMD_VISIT_BINOP
#undef SIMD_BINOP_LIST
void InstructionSelector::VisitI64x2SplatI32Pair(Node* node) {
ArmOperandGenerator g(this);
InstructionOperand operand0 = g.UseRegister(node->InputAt(0));
InstructionOperand operand1 = g.UseRegister(node->InputAt(1));
Emit(kArmI64x2SplatI32Pair, g.DefineAsRegister(node), operand0, operand1);
}
void InstructionSelector::VisitI64x2ReplaceLaneI32Pair(Node* node) {
ArmOperandGenerator g(this);
InstructionOperand operand = g.UseRegister(node->InputAt(0));
InstructionOperand lane = g.UseImmediate(OpParameter<int32_t>(node->op()));
InstructionOperand low = g.UseRegister(node->InputAt(1));
InstructionOperand high = g.UseRegister(node->InputAt(2));
Emit(kArmI64x2ReplaceLaneI32Pair, g.DefineSameAsFirst(node), operand, lane,
low, high);
}
void InstructionSelector::VisitF32x4Sqrt(Node* node) {
ArmOperandGenerator g(this);
// Use fixed registers in the lower 8 Q-registers so we can directly access

View File

@ -2655,7 +2655,7 @@ void InstructionSelector::VisitWord64AtomicCompareExchange(Node* node) {
#endif // !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_PPC
// !V8_TARGET_ARCH_MIPS64 && !V8_TARGET_ARCH_S390
#if !V8_TARGET_ARCH_IA32
#if !V8_TARGET_ARCH_IA32 && !V8_TARGET_ARCH_ARM
// This is only needed on 32-bit to split the 64-bit value into two operands.
void InstructionSelector::VisitI64x2SplatI32Pair(Node* node) {
UNIMPLEMENTED();

View File

@ -894,7 +894,6 @@ WASM_SIMD_TEST_NO_LOWERING(F32x4Qfms) {
}
#endif // V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64
#if V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_IA32
WASM_SIMD_TEST_NO_LOWERING(I64x2Splat) {
WasmRunner<int32_t, int64_t> r(execution_tier, lower_simd);
// Set up a global to hold output vector.
@ -945,6 +944,7 @@ WASM_SIMD_TEST_NO_LOWERING(I64x2ReplaceLane) {
}
}
#if V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_IA32
void RunI64x2UnOpTest(ExecutionTier execution_tier, LowerSimd lower_simd,
WasmOpcode opcode, Int64UnOp expected_op) {
WasmRunner<int32_t, int64_t> r(execution_tier, lower_simd);