[wasm-simd] Implement f32x4.sqrt for arm

Bug: v8:8460
Change-Id: I02f5ac42ab101dd8e12e14f253a625212db13a21
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/1808045
Commit-Queue: Zhi An Ng <zhin@chromium.org>
Reviewed-by: Deepti Gandluri <gdeepti@chromium.org>
Cr-Commit-Position: refs/heads/master@{#64011}
This commit is contained in:
Ng Zhi An 2019-09-25 14:58:07 -07:00 committed by Commit Bot
parent 160023c241
commit 36f2ec1fd8
6 changed files with 24 additions and 5 deletions

View File

@ -1798,6 +1798,19 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ vneg(i.OutputSimd128Register(), i.InputSimd128Register(0));
break;
}
case kArmF32x4Sqrt: {
QwNeonRegister dst = i.OutputSimd128Register();
QwNeonRegister src1 = i.InputSimd128Register(0);
DCHECK_EQ(dst, q0);
DCHECK_EQ(src1, q0);
#define S_FROM_Q(reg, lane) SwVfpRegister::from_code(reg.code() * 4 + lane)
__ vsqrt(S_FROM_Q(dst, 0), S_FROM_Q(src1, 0));
__ vsqrt(S_FROM_Q(dst, 1), S_FROM_Q(src1, 1));
__ vsqrt(S_FROM_Q(dst, 2), S_FROM_Q(src1, 2));
__ vsqrt(S_FROM_Q(dst, 3), S_FROM_Q(src1, 3));
#undef S_FROM_Q
break;
}
case kArmF32x4RecipApprox: {
__ vrecpe(i.OutputSimd128Register(), i.InputSimd128Register(0));
break;

View File

@ -135,6 +135,7 @@ namespace compiler {
V(ArmF32x4UConvertI32x4) \
V(ArmF32x4Abs) \
V(ArmF32x4Neg) \
V(ArmF32x4Sqrt) \
V(ArmF32x4RecipApprox) \
V(ArmF32x4RecipSqrtApprox) \
V(ArmF32x4Add) \

View File

@ -115,6 +115,7 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kArmF32x4UConvertI32x4:
case kArmF32x4Abs:
case kArmF32x4Neg:
case kArmF32x4Sqrt:
case kArmF32x4RecipApprox:
case kArmF32x4RecipSqrtApprox:
case kArmF32x4Add:

View File

@ -2518,6 +2518,14 @@ SIMD_BINOP_LIST(SIMD_VISIT_BINOP)
#undef SIMD_VISIT_BINOP
#undef SIMD_BINOP_LIST
void InstructionSelector::VisitF32x4Sqrt(Node* node) {
ArmOperandGenerator g(this);
// Use fixed registers in the lower 8 Q-registers so we can directly access
// mapped registers S0-S31.
Emit(kArmF32x4Sqrt, g.DefineAsFixed(node, q0),
g.UseFixed(node->InputAt(0), q0));
}
void InstructionSelector::VisitF32x4Div(Node* node) {
ArmOperandGenerator g(this);
// Use fixed registers in the lower 8 Q-registers so we can directly access

View File

@ -2661,9 +2661,6 @@ void InstructionSelector::VisitF64x2Qfma(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF64x2Qfms(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF32x4Qfma(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF32x4Qfms(Node* node) { UNIMPLEMENTED(); }
#if !V8_TARGET_ARCH_IA32
void InstructionSelector::VisitF32x4Sqrt(Node* node) { UNIMPLEMENTED(); }
#endif // !V8_TARGET_ARCH_IA32
#endif // !V8_TARGET_ARCH_ARM64
void InstructionSelector::VisitI64x2MinS(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitI64x2MaxS(Node* node) { UNIMPLEMENTED(); }

View File

@ -688,15 +688,14 @@ void RunF32x4UnOpTest(ExecutionTier execution_tier, LowerSimd lower_simd,
WASM_SIMD_TEST(F32x4Abs) {
RunF32x4UnOpTest(execution_tier, lower_simd, kExprF32x4Abs, std::abs);
}
WASM_SIMD_TEST(F32x4Neg) {
RunF32x4UnOpTest(execution_tier, lower_simd, kExprF32x4Neg, Negate);
}
#if V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_IA32
WASM_SIMD_TEST(F32x4Sqrt) {
RunF32x4UnOpTest(execution_tier, lower_simd, kExprF32x4Sqrt, Sqrt);
}
#endif // V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_IA32
WASM_SIMD_TEST(F32x4RecipApprox) {
RunF32x4UnOpTest(execution_tier, lower_simd, kExprF32x4RecipApprox,