1
0
mirror of https://github.com/microsoft/DirectXMath synced 2024-11-24 21:20:13 +00:00

Fixed ARM-NEON GCC conformance issue with half-precision optimizations on ARM64

This commit is contained in:
Chuck Walbourn 2021-01-10 20:40:17 -08:00
parent 5d152671db
commit 5a5d2bdea0
2 changed files with 9 additions and 9 deletions

View File

@ -644,13 +644,13 @@ inline HALF* XMConvertFloatToHalfStream
uint16x4_t vHalf = vreinterpret_u16_f16(vcvt_f16_f32(vFloat));
vst1_lane_u16(reinterpret_cast<float*>(pHalf), vHalf, 0);
vst1_lane_u16(reinterpret_cast<uint16_t*>(pHalf), vHalf, 0);
pHalf += OutputStride;
vst1_lane_u16(reinterpret_cast<float*>(pHalf), vHalf, 1);
vst1_lane_u16(reinterpret_cast<uint16_t*>(pHalf), vHalf, 1);
pHalf += OutputStride;
vst1_lane_u16(reinterpret_cast<float*>(pHalf), vHalf, 2);
vst1_lane_u16(reinterpret_cast<uint16_t*>(pHalf), vHalf, 2);
pHalf += OutputStride;
vst1_lane_u16(reinterpret_cast<float*>(pHalf), vHalf, 3);
vst1_lane_u16(reinterpret_cast<uint16_t*>(pHalf), vHalf, 3);
pHalf += OutputStride;
i += 4;
}
@ -701,13 +701,13 @@ inline HALF* XMConvertFloatToHalfStream
uint16x4_t vHalf = vreinterpret_u16_f16(vcvt_f16_f32(vFloat));
vst1_lane_u16(reinterpret_cast<float*>(pHalf), vHalf, 0);
vst1_lane_u16(reinterpret_cast<uint16_t*>(pHalf), vHalf, 0);
pHalf += OutputStride;
vst1_lane_u16(reinterpret_cast<float*>(pHalf), vHalf, 1);
vst1_lane_u16(reinterpret_cast<uint16_t*>(pHalf), vHalf, 1);
pHalf += OutputStride;
vst1_lane_u16(reinterpret_cast<float*>(pHalf), vHalf, 2);
vst1_lane_u16(reinterpret_cast<uint16_t*>(pHalf), vHalf, 2);
pHalf += OutputStride;
vst1_lane_u16(reinterpret_cast<float*>(pHalf), vHalf, 3);
vst1_lane_u16(reinterpret_cast<uint16_t*>(pHalf), vHalf, 3);
pHalf += OutputStride;
i += 4;
}

View File

@ -1,6 +1,6 @@
The MIT License (MIT)
Copyright (c) 2011-2020 Microsoft Corp
Copyright (c) 2011-2021 Microsoft Corp
Permission is hereby granted, free of charge, to any person obtaining a copy of this
software and associated documentation files (the "Software"), to deal in the Software