[wasm-simd][arm64] Prototype double precision conversion
Prototype these 6 instructions on arm64: - f64x2.convert_low_i32x4_s - f64x2.convert_low_i32x4_u - i32x4.trunc_sat_f64x2_s_zero - i32x4.trunc_sat_f64x2_u_zero - f32x4.demote_f64x2_zero - f64x2.promote_low_f32x4 Drive-by fix: - f64x2.promote_low_f32x4 accesses out of bounds for the global, the result only has 2 doubles - fcvtn in simulator needs to clear top bits of the Q reg Bug: v8:11265 Change-Id: Icfb3338942f0d0374448fdcfef3847a6e3ce8ff6 Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2644066 Reviewed-by: Deepti Gandluri <gdeepti@chromium.org> Reviewed-by: Jakob Kummerow <jkummerow@chromium.org> Commit-Queue: Zhi An Ng <zhin@chromium.org> Cr-Commit-Position: refs/heads/master@{#72373}
This commit is contained in:
parent
cf380f5965
commit
dfe7c465d3
@ -261,8 +261,10 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
|
||||
V(faddp, Faddp) \
|
||||
V(fcvtas, Fcvtas) \
|
||||
V(fcvtau, Fcvtau) \
|
||||
V(fcvtl, Fcvtl) \
|
||||
V(fcvtms, Fcvtms) \
|
||||
V(fcvtmu, Fcvtmu) \
|
||||
V(fcvtn, Fcvtn) \
|
||||
V(fcvtns, Fcvtns) \
|
||||
V(fcvtnu, Fcvtnu) \
|
||||
V(fcvtps, Fcvtps) \
|
||||
|
@ -1955,6 +1955,40 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
|
||||
i.InputSimd128Register(0).Format(narrow));
|
||||
break;
|
||||
}
|
||||
case kArm64F64x2ConvertLowI32x4S: {
|
||||
VRegister dst = i.OutputSimd128Register().V2D();
|
||||
__ Sxtl(dst, i.InputSimd128Register(0).V2S());
|
||||
__ Scvtf(dst, dst);
|
||||
break;
|
||||
}
|
||||
case kArm64F64x2ConvertLowI32x4U: {
|
||||
VRegister dst = i.OutputSimd128Register().V2D();
|
||||
__ Uxtl(dst, i.InputSimd128Register(0).V2S());
|
||||
__ Ucvtf(dst, dst);
|
||||
break;
|
||||
}
|
||||
case kArm64I32x4TruncSatF64x2SZero: {
|
||||
VRegister dst = i.OutputSimd128Register();
|
||||
__ Fcvtzs(dst.V2D(), i.InputSimd128Register(0).V2D());
|
||||
__ Sqxtn(dst.V2S(), dst.V2D());
|
||||
break;
|
||||
}
|
||||
case kArm64I32x4TruncSatF64x2UZero: {
|
||||
VRegister dst = i.OutputSimd128Register();
|
||||
__ Fcvtzu(dst.V2D(), i.InputSimd128Register(0).V2D());
|
||||
__ Uqxtn(dst.V2S(), dst.V2D());
|
||||
break;
|
||||
}
|
||||
case kArm64F32x4DemoteF64x2Zero: {
|
||||
__ Fcvtn(i.OutputSimd128Register().V2S(),
|
||||
i.InputSimd128Register(0).V2D());
|
||||
break;
|
||||
}
|
||||
case kArm64F64x2PromoteLowF32x4: {
|
||||
__ Fcvtl(i.OutputSimd128Register().V2D(),
|
||||
i.InputSimd128Register(0).V2S());
|
||||
break;
|
||||
}
|
||||
case kArm64F64x2Splat: {
|
||||
__ Dup(i.OutputSimd128Register().V2D(), i.InputSimd128Register(0).D(), 0);
|
||||
break;
|
||||
|
@ -198,6 +198,9 @@ namespace compiler {
|
||||
V(Arm64F64x2Qfms) \
|
||||
V(Arm64F64x2Pmin) \
|
||||
V(Arm64F64x2Pmax) \
|
||||
V(Arm64F64x2ConvertLowI32x4S) \
|
||||
V(Arm64F64x2ConvertLowI32x4U) \
|
||||
V(Arm64F64x2PromoteLowF32x4) \
|
||||
V(Arm64F32x4Splat) \
|
||||
V(Arm64F32x4ExtractLane) \
|
||||
V(Arm64F32x4ReplaceLane) \
|
||||
@ -223,6 +226,7 @@ namespace compiler {
|
||||
V(Arm64F32x4Qfms) \
|
||||
V(Arm64F32x4Pmin) \
|
||||
V(Arm64F32x4Pmax) \
|
||||
V(Arm64F32x4DemoteF64x2Zero) \
|
||||
V(Arm64I64x2Splat) \
|
||||
V(Arm64I64x2ExtractLane) \
|
||||
V(Arm64I64x2ReplaceLane) \
|
||||
@ -263,6 +267,8 @@ namespace compiler {
|
||||
V(Arm64I32x4Abs) \
|
||||
V(Arm64I32x4BitMask) \
|
||||
V(Arm64I32x4DotI16x8S) \
|
||||
V(Arm64I32x4TruncSatF64x2SZero) \
|
||||
V(Arm64I32x4TruncSatF64x2UZero) \
|
||||
V(Arm64I16x8Splat) \
|
||||
V(Arm64I16x8ExtractLaneU) \
|
||||
V(Arm64I16x8ExtractLaneS) \
|
||||
|
@ -163,6 +163,9 @@ int InstructionScheduler::GetTargetInstructionFlags(
|
||||
case kArm64F64x2Qfms:
|
||||
case kArm64F64x2Pmin:
|
||||
case kArm64F64x2Pmax:
|
||||
case kArm64F64x2ConvertLowI32x4S:
|
||||
case kArm64F64x2ConvertLowI32x4U:
|
||||
case kArm64F64x2PromoteLowF32x4:
|
||||
case kArm64F32x4Splat:
|
||||
case kArm64F32x4ExtractLane:
|
||||
case kArm64F32x4ReplaceLane:
|
||||
@ -188,6 +191,7 @@ int InstructionScheduler::GetTargetInstructionFlags(
|
||||
case kArm64F32x4Qfms:
|
||||
case kArm64F32x4Pmin:
|
||||
case kArm64F32x4Pmax:
|
||||
case kArm64F32x4DemoteF64x2Zero:
|
||||
case kArm64I64x2Splat:
|
||||
case kArm64I64x2ExtractLane:
|
||||
case kArm64I64x2ReplaceLane:
|
||||
@ -232,6 +236,8 @@ int InstructionScheduler::GetTargetInstructionFlags(
|
||||
case kArm64I32x4Abs:
|
||||
case kArm64I32x4BitMask:
|
||||
case kArm64I32x4DotI16x8S:
|
||||
case kArm64I32x4TruncSatF64x2SZero:
|
||||
case kArm64I32x4TruncSatF64x2UZero:
|
||||
case kArm64I16x8Splat:
|
||||
case kArm64I16x8ExtractLaneU:
|
||||
case kArm64I16x8ExtractLaneS:
|
||||
|
@ -3403,6 +3403,9 @@ void InstructionSelector::VisitInt64AbsWithOverflow(Node* node) {
|
||||
V(F64x2Abs, kArm64F64x2Abs) \
|
||||
V(F64x2Neg, kArm64F64x2Neg) \
|
||||
V(F64x2Sqrt, kArm64F64x2Sqrt) \
|
||||
V(F64x2ConvertLowI32x4S, kArm64F64x2ConvertLowI32x4S) \
|
||||
V(F64x2ConvertLowI32x4U, kArm64F64x2ConvertLowI32x4U) \
|
||||
V(F64x2PromoteLowF32x4, kArm64F64x2PromoteLowF32x4) \
|
||||
V(F32x4SConvertI32x4, kArm64F32x4SConvertI32x4) \
|
||||
V(F32x4UConvertI32x4, kArm64F32x4UConvertI32x4) \
|
||||
V(F32x4Abs, kArm64F32x4Abs) \
|
||||
@ -3410,6 +3413,7 @@ void InstructionSelector::VisitInt64AbsWithOverflow(Node* node) {
|
||||
V(F32x4Sqrt, kArm64F32x4Sqrt) \
|
||||
V(F32x4RecipApprox, kArm64F32x4RecipApprox) \
|
||||
V(F32x4RecipSqrtApprox, kArm64F32x4RecipSqrtApprox) \
|
||||
V(F32x4DemoteF64x2Zero, kArm64F32x4DemoteF64x2Zero) \
|
||||
V(I64x2Neg, kArm64I64x2Neg) \
|
||||
V(I64x2BitMask, kArm64I64x2BitMask) \
|
||||
V(I32x4SConvertF32x4, kArm64I32x4SConvertF32x4) \
|
||||
@ -3417,6 +3421,8 @@ void InstructionSelector::VisitInt64AbsWithOverflow(Node* node) {
|
||||
V(I32x4UConvertF32x4, kArm64I32x4UConvertF32x4) \
|
||||
V(I32x4Abs, kArm64I32x4Abs) \
|
||||
V(I32x4BitMask, kArm64I32x4BitMask) \
|
||||
V(I32x4TruncSatF64x2SZero, kArm64I32x4TruncSatF64x2SZero) \
|
||||
V(I32x4TruncSatF64x2UZero, kArm64I32x4TruncSatF64x2UZero) \
|
||||
V(I16x8Neg, kArm64I16x8Neg) \
|
||||
V(I16x8Abs, kArm64I16x8Abs) \
|
||||
V(I16x8BitMask, kArm64I16x8BitMask) \
|
||||
|
@ -2800,7 +2800,7 @@ void InstructionSelector::VisitI64x2SignSelect(Node* node) { UNIMPLEMENTED(); }
|
||||
#endif // !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_IA32 && !V8_TARGET_ARCH_ARM64
|
||||
// && !V8_TARGET_ARCH_ARM
|
||||
|
||||
#if !V8_TARGET_ARCH_X64
|
||||
#if !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_ARM64
|
||||
void InstructionSelector::VisitF64x2ConvertLowI32x4S(Node* node) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
@ -2819,7 +2819,7 @@ void InstructionSelector::VisitI32x4TruncSatF64x2SZero(Node* node) {
|
||||
void InstructionSelector::VisitI32x4TruncSatF64x2UZero(Node* node) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
#endif //! V8_TARGET_ARCH_X64
|
||||
#endif //! V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_ARM64
|
||||
|
||||
#if !V8_TARGET_ARCH_X64
|
||||
// TODO(v8:11297) Prototype i32x4.widen_i8x16_u
|
||||
|
@ -3856,6 +3856,7 @@ LogicVRegister Simulator::fcvtn(VectorFormat vform, LogicVRegister dst,
|
||||
dst.SetFloat(i, FPToFloat(src.Float<double>(i), FPTieEven));
|
||||
}
|
||||
}
|
||||
dst.ClearForWrite(vform);
|
||||
return dst;
|
||||
}
|
||||
|
||||
|
@ -10888,6 +10888,26 @@ TEST(fcvtmu) {
|
||||
CHECK_EQUAL_64(0x0UL, x30);
|
||||
}
|
||||
|
||||
TEST(fcvtn) {
|
||||
INIT_V8();
|
||||
SETUP();
|
||||
START();
|
||||
|
||||
double src[2] = {1.0f, 1.0f};
|
||||
uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
|
||||
|
||||
__ Mov(x0, src_base);
|
||||
__ Ldr(q0, MemOperand(x0, 0));
|
||||
|
||||
__ Fcvtn(q0.V2S(), q0.V2D());
|
||||
|
||||
END();
|
||||
RUN();
|
||||
|
||||
// Ensure top half is cleared.
|
||||
CHECK_EQUAL_128(0, 0x3f800000'3f800000, q0);
|
||||
}
|
||||
|
||||
TEST(fcvtns) {
|
||||
INIT_V8();
|
||||
SETUP();
|
||||
|
@ -1273,7 +1273,7 @@ WASM_SIMD_TEST(F64x2NearestInt) {
|
||||
}
|
||||
|
||||
// TODO(v8:11265): Prototyping double precision conversions.
|
||||
#if V8_TARGET_ARCH_X64
|
||||
#if V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64
|
||||
template <typename SrcType>
|
||||
void RunF64x2ConvertLowI32x4Test(TestExecutionTier execution_tier,
|
||||
LowerSimd lower_simd, WasmOpcode opcode) {
|
||||
@ -1388,13 +1388,9 @@ WASM_SIMD_TEST_NO_LOWERING(F64x2PromoteLowF32x4) {
|
||||
double actual = ReadLittleEndianValue<double>(&g[i]);
|
||||
CheckDoubleResult(x, x, expected, actual, true);
|
||||
}
|
||||
for (int i = 2; i < 4; i++) {
|
||||
double actual = ReadLittleEndianValue<double>(&g[i]);
|
||||
CheckDoubleResult(x, x, 0, actual, true);
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif // V8_TARGET_ARCH_X64
|
||||
#endif // V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64
|
||||
|
||||
void RunF64x2BinOpTest(TestExecutionTier execution_tier, LowerSimd lower_simd,
|
||||
WasmOpcode opcode, DoubleBinOp expected_op) {
|
||||
|
Loading…
Reference in New Issue
Block a user