Fixed ARM-NEON GCC conformance issue with half-precision optimizations on ARM64

2024-11-24 21:20:13 +00:00 · 2021-01-10 20:40:17 -08:00 · 2021-01-10 20:40:17 -08:00 · 5a5d2bdea0
commit 5a5d2bdea0
parent 5d152671db
2 changed files with 9 additions and 9 deletions
--- a/Inc/DirectXPackedVector.inl
+++ b/Inc/DirectXPackedVector.inl
@ -644,13 +644,13 @@ inline HALF* XMConvertFloatToHalfStream

                    uint16x4_t vHalf = vreinterpret_u16_f16(vcvt_f16_f32(vFloat));

-                    vst1_lane_u16(reinterpret_cast<float*>(pHalf), vHalf, 0);
+                    vst1_lane_u16(reinterpret_cast<uint16_t*>(pHalf), vHalf, 0);
                    pHalf += OutputStride;
-                    vst1_lane_u16(reinterpret_cast<float*>(pHalf), vHalf, 1);
+                    vst1_lane_u16(reinterpret_cast<uint16_t*>(pHalf), vHalf, 1);
                    pHalf += OutputStride;
-                    vst1_lane_u16(reinterpret_cast<float*>(pHalf), vHalf, 2);
+                    vst1_lane_u16(reinterpret_cast<uint16_t*>(pHalf), vHalf, 2);
                    pHalf += OutputStride;
-                    vst1_lane_u16(reinterpret_cast<float*>(pHalf), vHalf, 3);
+                    vst1_lane_u16(reinterpret_cast<uint16_t*>(pHalf), vHalf, 3);
                    pHalf += OutputStride;
                    i += 4;
                }
@ -701,13 +701,13 @@ inline HALF* XMConvertFloatToHalfStream

                uint16x4_t vHalf = vreinterpret_u16_f16(vcvt_f16_f32(vFloat));

-                vst1_lane_u16(reinterpret_cast<float*>(pHalf), vHalf, 0);
+                vst1_lane_u16(reinterpret_cast<uint16_t*>(pHalf), vHalf, 0);
                pHalf += OutputStride;
-                vst1_lane_u16(reinterpret_cast<float*>(pHalf), vHalf, 1);
+                vst1_lane_u16(reinterpret_cast<uint16_t*>(pHalf), vHalf, 1);
                pHalf += OutputStride;
-                vst1_lane_u16(reinterpret_cast<float*>(pHalf), vHalf, 2);
+                vst1_lane_u16(reinterpret_cast<uint16_t*>(pHalf), vHalf, 2);
                pHalf += OutputStride;
-                vst1_lane_u16(reinterpret_cast<float*>(pHalf), vHalf, 3);
+                vst1_lane_u16(reinterpret_cast<uint16_t*>(pHalf), vHalf, 3);
                pHalf += OutputStride;
                i += 4;
            }
--- a/2
+++ b/2
@ -1,6 +1,6 @@
                               The MIT License (MIT)

-Copyright (c) 2011-2020 Microsoft Corp
+Copyright (c) 2011-2021 Microsoft Corp

 Permission is hereby granted, free of charge, to any person obtaining a copy of this 
 software and associated documentation files (the "Software"), to deal in the Software