[wasm-simd][arm] Implement double precision conversion
Prototype these 6 instructions on arm: - f64x2.convert_low_i32x4_s - f64x2.convert_low_i32x4_u - i32x4.trunc_sat_f64x2_s_zero - i32x4.trunc_sat_f64x2_u_zero - f32x4.demote_f64x2_zero - f64x2.promote_low_f32x4 For all these instructions we rely on having Q registers that map to S registers, which means we can only use q0 to q7. We fix the src/dst to q0 arbitrarily. Bug: v8:11265 Change-Id: Ied95f2dde9859a60fc216ed67615f80e9d795bb7 Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2679842 Reviewed-by: Bill Budge <bbudge@chromium.org> Commit-Queue: Zhi An Ng <zhin@chromium.org> Cr-Commit-Position: refs/heads/master@{#72567}
This commit is contained in:
parent
a723767935
commit
3b6eb33543
@ -2065,6 +2065,27 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
|
|||||||
__ vrintn(dst.high(), src.high());
|
__ vrintn(dst.high(), src.high());
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
case kArmF64x2ConvertLowI32x4S: {
|
||||||
|
Simd128Register dst = i.OutputSimd128Register();
|
||||||
|
Simd128Register src = i.InputSimd128Register(0);
|
||||||
|
__ vcvt_f64_s32(dst.low(), SwVfpRegister::from_code(src.code() * 4));
|
||||||
|
__ vcvt_f64_s32(dst.high(), SwVfpRegister::from_code(src.code() * 4 + 1));
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case kArmF64x2ConvertLowI32x4U: {
|
||||||
|
Simd128Register dst = i.OutputSimd128Register();
|
||||||
|
Simd128Register src = i.InputSimd128Register(0);
|
||||||
|
__ vcvt_f64_u32(dst.low(), SwVfpRegister::from_code(src.code() * 4));
|
||||||
|
__ vcvt_f64_u32(dst.high(), SwVfpRegister::from_code(src.code() * 4 + 1));
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case kArmF64x2PromoteLowF32x4: {
|
||||||
|
Simd128Register dst = i.OutputSimd128Register();
|
||||||
|
Simd128Register src = i.InputSimd128Register(0);
|
||||||
|
__ vcvt_f64_f32(dst.low(), SwVfpRegister::from_code(src.code() * 4));
|
||||||
|
__ vcvt_f64_f32(dst.high(), SwVfpRegister::from_code(src.code() * 4 + 1));
|
||||||
|
break;
|
||||||
|
}
|
||||||
case kArmI64x2SplatI32Pair: {
|
case kArmI64x2SplatI32Pair: {
|
||||||
Simd128Register dst = i.OutputSimd128Register();
|
Simd128Register dst = i.OutputSimd128Register();
|
||||||
__ vdup(Neon32, dst, i.InputRegister(0));
|
__ vdup(Neon32, dst, i.InputRegister(0));
|
||||||
@ -2339,6 +2360,14 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
|
|||||||
__ vbsl(dst, rhs, lhs);
|
__ vbsl(dst, rhs, lhs);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
case kArmF32x4DemoteF64x2Zero: {
|
||||||
|
Simd128Register dst = i.OutputSimd128Register();
|
||||||
|
Simd128Register src = i.InputSimd128Register(0);
|
||||||
|
__ vcvt_f32_f64(SwVfpRegister::from_code(dst.code() * 4), src.low());
|
||||||
|
__ vcvt_f32_f64(SwVfpRegister::from_code(dst.code() * 4 + 1), src.high());
|
||||||
|
__ vmov(dst.high(), 0);
|
||||||
|
break;
|
||||||
|
}
|
||||||
case kArmI32x4Splat: {
|
case kArmI32x4Splat: {
|
||||||
__ vdup(Neon32, i.OutputSimd128Register(), i.InputRegister(0));
|
__ vdup(Neon32, i.OutputSimd128Register(), i.InputRegister(0));
|
||||||
break;
|
break;
|
||||||
@ -2506,6 +2535,22 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
|
|||||||
__ vpadd(Neon32, dst.high(), scratch.low(), scratch.high());
|
__ vpadd(Neon32, dst.high(), scratch.low(), scratch.high());
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
case kArmI32x4TruncSatF64x2SZero: {
|
||||||
|
Simd128Register dst = i.OutputSimd128Register();
|
||||||
|
Simd128Register src = i.InputSimd128Register(0);
|
||||||
|
__ vcvt_s32_f64(SwVfpRegister::from_code(dst.code() * 4), src.low());
|
||||||
|
__ vcvt_s32_f64(SwVfpRegister::from_code(dst.code() * 4 + 1), src.high());
|
||||||
|
__ vmov(dst.high(), 0);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case kArmI32x4TruncSatF64x2UZero: {
|
||||||
|
Simd128Register dst = i.OutputSimd128Register();
|
||||||
|
Simd128Register src = i.InputSimd128Register(0);
|
||||||
|
__ vcvt_u32_f64(SwVfpRegister::from_code(dst.code() * 4), src.low());
|
||||||
|
__ vcvt_u32_f64(SwVfpRegister::from_code(dst.code() * 4 + 1), src.high());
|
||||||
|
__ vmov(dst.high(), 0);
|
||||||
|
break;
|
||||||
|
}
|
||||||
case kArmI16x8Splat: {
|
case kArmI16x8Splat: {
|
||||||
__ vdup(Neon16, i.OutputSimd128Register(), i.InputRegister(0));
|
__ vdup(Neon16, i.OutputSimd128Register(), i.InputRegister(0));
|
||||||
break;
|
break;
|
||||||
|
@ -154,6 +154,9 @@ namespace compiler {
|
|||||||
V(ArmF64x2Floor) \
|
V(ArmF64x2Floor) \
|
||||||
V(ArmF64x2Trunc) \
|
V(ArmF64x2Trunc) \
|
||||||
V(ArmF64x2NearestInt) \
|
V(ArmF64x2NearestInt) \
|
||||||
|
V(ArmF64x2ConvertLowI32x4S) \
|
||||||
|
V(ArmF64x2ConvertLowI32x4U) \
|
||||||
|
V(ArmF64x2PromoteLowF32x4) \
|
||||||
V(ArmF32x4Splat) \
|
V(ArmF32x4Splat) \
|
||||||
V(ArmF32x4ExtractLane) \
|
V(ArmF32x4ExtractLane) \
|
||||||
V(ArmF32x4ReplaceLane) \
|
V(ArmF32x4ReplaceLane) \
|
||||||
@ -177,6 +180,7 @@ namespace compiler {
|
|||||||
V(ArmF32x4Le) \
|
V(ArmF32x4Le) \
|
||||||
V(ArmF32x4Pmin) \
|
V(ArmF32x4Pmin) \
|
||||||
V(ArmF32x4Pmax) \
|
V(ArmF32x4Pmax) \
|
||||||
|
V(ArmF32x4DemoteF64x2Zero) \
|
||||||
V(ArmI64x2SplatI32Pair) \
|
V(ArmI64x2SplatI32Pair) \
|
||||||
V(ArmI64x2ReplaceLaneI32Pair) \
|
V(ArmI64x2ReplaceLaneI32Pair) \
|
||||||
V(ArmI64x2Neg) \
|
V(ArmI64x2Neg) \
|
||||||
@ -222,6 +226,8 @@ namespace compiler {
|
|||||||
V(ArmI32x4Abs) \
|
V(ArmI32x4Abs) \
|
||||||
V(ArmI32x4BitMask) \
|
V(ArmI32x4BitMask) \
|
||||||
V(ArmI32x4DotI16x8S) \
|
V(ArmI32x4DotI16x8S) \
|
||||||
|
V(ArmI32x4TruncSatF64x2SZero) \
|
||||||
|
V(ArmI32x4TruncSatF64x2UZero) \
|
||||||
V(ArmI16x8Splat) \
|
V(ArmI16x8Splat) \
|
||||||
V(ArmI16x8ExtractLaneS) \
|
V(ArmI16x8ExtractLaneS) \
|
||||||
V(ArmI16x8ReplaceLane) \
|
V(ArmI16x8ReplaceLane) \
|
||||||
|
@ -134,6 +134,9 @@ int InstructionScheduler::GetTargetInstructionFlags(
|
|||||||
case kArmF64x2Floor:
|
case kArmF64x2Floor:
|
||||||
case kArmF64x2Trunc:
|
case kArmF64x2Trunc:
|
||||||
case kArmF64x2NearestInt:
|
case kArmF64x2NearestInt:
|
||||||
|
case kArmF64x2ConvertLowI32x4S:
|
||||||
|
case kArmF64x2ConvertLowI32x4U:
|
||||||
|
case kArmF64x2PromoteLowF32x4:
|
||||||
case kArmF32x4Splat:
|
case kArmF32x4Splat:
|
||||||
case kArmF32x4ExtractLane:
|
case kArmF32x4ExtractLane:
|
||||||
case kArmF32x4ReplaceLane:
|
case kArmF32x4ReplaceLane:
|
||||||
@ -157,6 +160,7 @@ int InstructionScheduler::GetTargetInstructionFlags(
|
|||||||
case kArmF32x4Le:
|
case kArmF32x4Le:
|
||||||
case kArmF32x4Pmin:
|
case kArmF32x4Pmin:
|
||||||
case kArmF32x4Pmax:
|
case kArmF32x4Pmax:
|
||||||
|
case kArmF32x4DemoteF64x2Zero:
|
||||||
case kArmI64x2SplatI32Pair:
|
case kArmI64x2SplatI32Pair:
|
||||||
case kArmI64x2ReplaceLaneI32Pair:
|
case kArmI64x2ReplaceLaneI32Pair:
|
||||||
case kArmI64x2Neg:
|
case kArmI64x2Neg:
|
||||||
@ -202,6 +206,8 @@ int InstructionScheduler::GetTargetInstructionFlags(
|
|||||||
case kArmI32x4Abs:
|
case kArmI32x4Abs:
|
||||||
case kArmI32x4BitMask:
|
case kArmI32x4BitMask:
|
||||||
case kArmI32x4DotI16x8S:
|
case kArmI32x4DotI16x8S:
|
||||||
|
case kArmI32x4TruncSatF64x2SZero:
|
||||||
|
case kArmI32x4TruncSatF64x2UZero:
|
||||||
case kArmI16x8Splat:
|
case kArmI16x8Splat:
|
||||||
case kArmI16x8ExtractLaneS:
|
case kArmI16x8ExtractLaneS:
|
||||||
case kArmI16x8ReplaceLane:
|
case kArmI16x8ReplaceLane:
|
||||||
|
@ -3149,6 +3149,45 @@ void InstructionSelector::VisitTruncateFloat32ToUint32(Node* node) {
|
|||||||
Emit(opcode, g.DefineAsRegister(node), g.UseRegister(node->InputAt(0)));
|
Emit(opcode, g.DefineAsRegister(node), g.UseRegister(node->InputAt(0)));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TODO(v8:9780)
|
||||||
|
// These double precision conversion instructions need a low Q register (q0-q7)
|
||||||
|
// because the codegen accesses the S registers they overlap with.
|
||||||
|
void InstructionSelector::VisitF64x2ConvertLowI32x4S(Node* node) {
|
||||||
|
ArmOperandGenerator g(this);
|
||||||
|
Emit(kArmF64x2ConvertLowI32x4S, g.DefineAsRegister(node),
|
||||||
|
g.UseFixed(node->InputAt(0), q0));
|
||||||
|
}
|
||||||
|
|
||||||
|
void InstructionSelector::VisitF64x2ConvertLowI32x4U(Node* node) {
|
||||||
|
ArmOperandGenerator g(this);
|
||||||
|
Emit(kArmF64x2ConvertLowI32x4U, g.DefineAsRegister(node),
|
||||||
|
g.UseFixed(node->InputAt(0), q0));
|
||||||
|
}
|
||||||
|
|
||||||
|
void InstructionSelector::VisitI32x4TruncSatF64x2SZero(Node* node) {
|
||||||
|
ArmOperandGenerator g(this);
|
||||||
|
Emit(kArmI32x4TruncSatF64x2SZero, g.DefineAsFixed(node, q0),
|
||||||
|
g.UseRegister(node->InputAt(0)));
|
||||||
|
}
|
||||||
|
|
||||||
|
void InstructionSelector::VisitI32x4TruncSatF64x2UZero(Node* node) {
|
||||||
|
ArmOperandGenerator g(this);
|
||||||
|
Emit(kArmI32x4TruncSatF64x2UZero, g.DefineAsFixed(node, q0),
|
||||||
|
g.UseRegister(node->InputAt(0)));
|
||||||
|
}
|
||||||
|
|
||||||
|
void InstructionSelector::VisitF32x4DemoteF64x2Zero(Node* node) {
|
||||||
|
ArmOperandGenerator g(this);
|
||||||
|
Emit(kArmF32x4DemoteF64x2Zero, g.DefineAsFixed(node, q0),
|
||||||
|
g.UseRegister(node->InputAt(0)));
|
||||||
|
}
|
||||||
|
|
||||||
|
void InstructionSelector::VisitF64x2PromoteLowF32x4(Node* node) {
|
||||||
|
ArmOperandGenerator g(this);
|
||||||
|
Emit(kArmF64x2PromoteLowF32x4, g.DefineAsRegister(node),
|
||||||
|
g.UseFixed(node->InputAt(0), q0));
|
||||||
|
}
|
||||||
|
|
||||||
// static
|
// static
|
||||||
MachineOperatorBuilder::Flags
|
MachineOperatorBuilder::Flags
|
||||||
InstructionSelector::SupportedMachineOperatorFlags() {
|
InstructionSelector::SupportedMachineOperatorFlags() {
|
||||||
|
@ -2787,27 +2787,6 @@ void InstructionSelector::VisitI64x2SignSelect(Node* node) { UNIMPLEMENTED(); }
|
|||||||
#endif // !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_IA32 && !V8_TARGET_ARCH_ARM64
|
#endif // !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_IA32 && !V8_TARGET_ARCH_ARM64
|
||||||
// && !V8_TARGET_ARCH_ARM
|
// && !V8_TARGET_ARCH_ARM
|
||||||
|
|
||||||
#if !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_IA32
|
|
||||||
void InstructionSelector::VisitF64x2ConvertLowI32x4S(Node* node) {
|
|
||||||
UNIMPLEMENTED();
|
|
||||||
}
|
|
||||||
void InstructionSelector::VisitF64x2ConvertLowI32x4U(Node* node) {
|
|
||||||
UNIMPLEMENTED();
|
|
||||||
}
|
|
||||||
void InstructionSelector::VisitF64x2PromoteLowF32x4(Node* node) {
|
|
||||||
UNIMPLEMENTED();
|
|
||||||
}
|
|
||||||
void InstructionSelector::VisitF32x4DemoteF64x2Zero(Node* node) {
|
|
||||||
UNIMPLEMENTED();
|
|
||||||
}
|
|
||||||
void InstructionSelector::VisitI32x4TruncSatF64x2SZero(Node* node) {
|
|
||||||
UNIMPLEMENTED();
|
|
||||||
}
|
|
||||||
void InstructionSelector::VisitI32x4TruncSatF64x2UZero(Node* node) {
|
|
||||||
UNIMPLEMENTED();
|
|
||||||
}
|
|
||||||
#endif //! V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_IA32
|
|
||||||
|
|
||||||
#if !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_ARM64
|
#if !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_ARM64
|
||||||
// TODO(v8:11297) Prototype i32x4.widen_i8x16_u
|
// TODO(v8:11297) Prototype i32x4.widen_i8x16_u
|
||||||
void InstructionSelector::VisitI32x4WidenI8x16S(Node* node) { UNIMPLEMENTED(); }
|
void InstructionSelector::VisitI32x4WidenI8x16S(Node* node) { UNIMPLEMENTED(); }
|
||||||
|
@ -1278,8 +1278,6 @@ WASM_SIMD_TEST(F64x2NearestInt) {
|
|||||||
true);
|
true);
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO(v8:11265): Prototyping double precision conversions.
|
|
||||||
#if V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_IA32
|
|
||||||
template <typename SrcType>
|
template <typename SrcType>
|
||||||
void RunF64x2ConvertLowI32x4Test(TestExecutionTier execution_tier,
|
void RunF64x2ConvertLowI32x4Test(TestExecutionTier execution_tier,
|
||||||
LowerSimd lower_simd, WasmOpcode opcode) {
|
LowerSimd lower_simd, WasmOpcode opcode) {
|
||||||
@ -1396,7 +1394,6 @@ WASM_SIMD_TEST_NO_LOWERING(F64x2PromoteLowF32x4) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif // V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_IA32
|
|
||||||
|
|
||||||
void RunF64x2BinOpTest(TestExecutionTier execution_tier, LowerSimd lower_simd,
|
void RunF64x2BinOpTest(TestExecutionTier execution_tier, LowerSimd lower_simd,
|
||||||
WasmOpcode opcode, DoubleBinOp expected_op) {
|
WasmOpcode opcode, DoubleBinOp expected_op) {
|
||||||
|
Loading…
Reference in New Issue
Block a user