mirror of
https://github.com/microsoft/DirectXMath
synced 2024-11-08 13:40:09 +00:00
Improve support for /Zc:arm64-aliased-neon-types-
This commit is contained in:
parent
f639e2d8f5
commit
fc2763a681
@ -411,7 +411,7 @@ namespace DirectX
|
||||
#elif defined(_XM_SSE_INTRINSICS_)
|
||||
inline operator __m128i() const noexcept { return _mm_castps_si128(v); }
|
||||
inline operator __m128d() const noexcept { return _mm_castps_pd(v); }
|
||||
#elif defined(_XM_ARM_NEON_INTRINSICS_) && defined(__GNUC__)
|
||||
#elif defined(_XM_ARM_NEON_INTRINSICS_) && (defined(__GNUC__) || defined(_ARM64_DISTINCT_NEON_TYPES))
|
||||
inline operator int32x4_t() const noexcept { return vreinterpretq_s32_f32(v); }
|
||||
inline operator uint32x4_t() const noexcept { return vreinterpretq_u32_f32(v); }
|
||||
#endif
|
||||
@ -430,7 +430,7 @@ namespace DirectX
|
||||
#elif defined(_XM_SSE_INTRINSICS_)
|
||||
inline operator __m128i() const noexcept { return _mm_castps_si128(v); }
|
||||
inline operator __m128d() const noexcept { return _mm_castps_pd(v); }
|
||||
#elif defined(_XM_ARM_NEON_INTRINSICS_) && defined(__GNUC__)
|
||||
#elif defined(_XM_ARM_NEON_INTRINSICS_) && (defined(__GNUC__) || defined(_ARM64_DISTINCT_NEON_TYPES))
|
||||
inline operator int32x4_t() const noexcept { return vreinterpretq_s32_f32(v); }
|
||||
inline operator uint32x4_t() const noexcept { return vreinterpretq_u32_f32(v); }
|
||||
#endif
|
||||
@ -449,7 +449,7 @@ namespace DirectX
|
||||
#elif defined(_XM_SSE_INTRINSICS_)
|
||||
inline operator __m128i() const noexcept { return _mm_castps_si128(v); }
|
||||
inline operator __m128d() const noexcept { return _mm_castps_pd(v); }
|
||||
#elif defined(_XM_ARM_NEON_INTRINSICS_) && defined(__GNUC__)
|
||||
#elif defined(_XM_ARM_NEON_INTRINSICS_) && (defined(__GNUC__) || defined(_ARM64_DISTINCT_NEON_TYPES))
|
||||
inline operator int32x4_t() const noexcept { return vreinterpretq_s32_f32(v); }
|
||||
inline operator uint32x4_t() const noexcept { return vreinterpretq_u32_f32(v); }
|
||||
#endif
|
||||
@ -468,7 +468,7 @@ namespace DirectX
|
||||
#elif defined(_XM_SSE_INTRINSICS_)
|
||||
inline operator __m128i() const noexcept { return _mm_castps_si128(v); }
|
||||
inline operator __m128d() const noexcept { return _mm_castps_pd(v); }
|
||||
#elif defined(_XM_ARM_NEON_INTRINSICS_) && defined(__GNUC__)
|
||||
#elif defined(_XM_ARM_NEON_INTRINSICS_) && (defined(__GNUC__) || defined(_ARM64_DISTINCT_NEON_TYPES))
|
||||
inline operator int32x4_t() const noexcept { return vreinterpretq_s32_f32(v); }
|
||||
inline operator uint32x4_t() const noexcept { return vreinterpretq_u32_f32(v); }
|
||||
#endif
|
||||
|
@ -305,7 +305,7 @@ inline XMVECTOR XM_CALLCONV XMLoadInt2A(const uint32_t* pSource) noexcept
|
||||
V.vector4_u32[3] = 0;
|
||||
return V;
|
||||
#elif defined(_XM_ARM_NEON_INTRINSICS_)
|
||||
#if defined(_MSC_VER) && !defined(__clang__)
|
||||
#if defined(_MSC_VER) && !defined(__clang__) && !defined(_ARM64_DISTINCT_NEON_TYPES)
|
||||
uint32x2_t x = vld1_u32_ex(pSource, 64);
|
||||
#else
|
||||
uint32x2_t x = vld1_u32(pSource);
|
||||
@ -352,7 +352,7 @@ inline XMVECTOR XM_CALLCONV XMLoadFloat2A(const XMFLOAT2A* pSource) noexcept
|
||||
V.vector4_f32[3] = 0.f;
|
||||
return V;
|
||||
#elif defined(_XM_ARM_NEON_INTRINSICS_)
|
||||
#if defined(_MSC_VER) && !defined(__clang__)
|
||||
#if defined(_MSC_VER) && !defined(__clang__) && !defined(_ARM64_DISTINCT_NEON_TYPES)
|
||||
float32x2_t x = vld1_f32_ex(reinterpret_cast<const float*>(pSource), 64);
|
||||
#else
|
||||
float32x2_t x = vld1_f32(reinterpret_cast<const float*>(pSource));
|
||||
@ -465,7 +465,7 @@ inline XMVECTOR XM_CALLCONV XMLoadInt3A(const uint32_t* pSource) noexcept
|
||||
return V;
|
||||
#elif defined(_XM_ARM_NEON_INTRINSICS_)
|
||||
// Reads an extra integer which is zero'd
|
||||
#if defined(_MSC_VER) && !defined(__clang__)
|
||||
#if defined(_MSC_VER) && !defined(__clang__) && !defined(_ARM64_DISTINCT_NEON_TYPES)
|
||||
uint32x4_t V = vld1q_u32_ex(pSource, 128);
|
||||
#else
|
||||
uint32x4_t V = vld1q_u32(pSource);
|
||||
@ -525,7 +525,7 @@ inline XMVECTOR XM_CALLCONV XMLoadFloat3A(const XMFLOAT3A* pSource) noexcept
|
||||
return V;
|
||||
#elif defined(_XM_ARM_NEON_INTRINSICS_)
|
||||
// Reads an extra float which is zero'd
|
||||
#if defined(_MSC_VER) && !defined(__clang__)
|
||||
#if defined(_MSC_VER) && !defined(__clang__) && !defined(_ARM64_DISTINCT_NEON_TYPES)
|
||||
float32x4_t V = vld1q_f32_ex(reinterpret_cast<const float*>(pSource), 128);
|
||||
#else
|
||||
float32x4_t V = vld1q_f32(reinterpret_cast<const float*>(pSource));
|
||||
@ -639,7 +639,7 @@ inline XMVECTOR XM_CALLCONV XMLoadInt4A(const uint32_t* pSource) noexcept
|
||||
V.vector4_u32[3] = pSource[3];
|
||||
return V;
|
||||
#elif defined(_XM_ARM_NEON_INTRINSICS_)
|
||||
#if defined(_MSC_VER) && !defined(__clang__)
|
||||
#if defined(_MSC_VER) && !defined(__clang__) && !defined(_ARM64_DISTINCT_NEON_TYPES)
|
||||
return vld1q_u32_ex(pSource, 128);
|
||||
#else
|
||||
return vreinterpretq_f32_u32(vld1q_u32(pSource));
|
||||
@ -683,7 +683,7 @@ inline XMVECTOR XM_CALLCONV XMLoadFloat4A(const XMFLOAT4A* pSource) noexcept
|
||||
V.vector4_f32[3] = pSource->w;
|
||||
return V;
|
||||
#elif defined(_XM_ARM_NEON_INTRINSICS_)
|
||||
#if defined(_MSC_VER) && !defined(__clang__)
|
||||
#if defined(_MSC_VER) && !defined(__clang__) && !defined(_ARM64_DISTINCT_NEON_TYPES)
|
||||
return vld1q_f32_ex(reinterpret_cast<const float*>(pSource), 128);
|
||||
#else
|
||||
return vld1q_f32(reinterpret_cast<const float*>(pSource));
|
||||
@ -919,7 +919,7 @@ inline XMMATRIX XM_CALLCONV XMLoadFloat4x3A(const XMFLOAT4X3A* pSource) noexcept
|
||||
return M;
|
||||
|
||||
#elif defined(_XM_ARM_NEON_INTRINSICS_)
|
||||
#if defined(_MSC_VER) && !defined(__clang__)
|
||||
#if defined(_MSC_VER) && !defined(__clang__) && !defined(_ARM64_DISTINCT_NEON_TYPES)
|
||||
float32x4_t v0 = vld1q_f32_ex(&pSource->m[0][0], 128);
|
||||
float32x4_t v1 = vld1q_f32_ex(&pSource->m[1][1], 128);
|
||||
float32x4_t v2 = vld1q_f32_ex(&pSource->m[2][2], 128);
|
||||
@ -1081,7 +1081,7 @@ inline XMMATRIX XM_CALLCONV XMLoadFloat3x4A(const XMFLOAT3X4A* pSource) noexcept
|
||||
return M;
|
||||
|
||||
#elif defined(_XM_ARM_NEON_INTRINSICS_)
|
||||
#if defined(_MSC_VER) && !defined(__clang__)
|
||||
#if defined(_MSC_VER) && !defined(__clang__) && !defined(_ARM64_DISTINCT_NEON_TYPES)
|
||||
float32x2x4_t vTemp0 = vld4_f32_ex(&pSource->_11, 128);
|
||||
float32x4_t vTemp1 = vld1q_f32_ex(&pSource->_31, 128);
|
||||
#else
|
||||
@ -1212,7 +1212,7 @@ inline XMMATRIX XM_CALLCONV XMLoadFloat4x4A(const XMFLOAT4X4A* pSource) noexcept
|
||||
|
||||
#elif defined(_XM_ARM_NEON_INTRINSICS_)
|
||||
XMMATRIX M;
|
||||
#if defined(_MSC_VER) && !defined(__clang__)
|
||||
#if defined(_MSC_VER) && !defined(__clang__) && !defined(_ARM64_DISTINCT_NEON_TYPES)
|
||||
M.r[0] = vld1q_f32_ex(reinterpret_cast<const float*>(&pSource->_11), 128);
|
||||
M.r[1] = vld1q_f32_ex(reinterpret_cast<const float*>(&pSource->_21), 128);
|
||||
M.r[2] = vld1q_f32_ex(reinterpret_cast<const float*>(&pSource->_31), 128);
|
||||
@ -1309,7 +1309,7 @@ inline void XM_CALLCONV XMStoreInt2A
|
||||
pDestination[1] = V.vector4_u32[1];
|
||||
#elif defined(_XM_ARM_NEON_INTRINSICS_)
|
||||
uint32x2_t VL = vget_low_u32(vreinterpretq_u32_f32(V));
|
||||
#if defined(_MSC_VER) && !defined(__clang__)
|
||||
#if defined(_MSC_VER) && !defined(__clang__) && !defined(_ARM64_DISTINCT_NEON_TYPES)
|
||||
vst1_u32_ex(pDestination, VL, 64);
|
||||
#else
|
||||
vst1_u32(pDestination, VL);
|
||||
@ -1354,7 +1354,7 @@ inline void XM_CALLCONV XMStoreFloat2A
|
||||
pDestination->y = V.vector4_f32[1];
|
||||
#elif defined(_XM_ARM_NEON_INTRINSICS_)
|
||||
float32x2_t VL = vget_low_f32(V);
|
||||
#if defined(_MSC_VER) && !defined(__clang__)
|
||||
#if defined(_MSC_VER) && !defined(__clang__) && !defined(_ARM64_DISTINCT_NEON_TYPES)
|
||||
vst1_f32_ex(reinterpret_cast<float*>(pDestination), VL, 64);
|
||||
#else
|
||||
vst1_f32(reinterpret_cast<float*>(pDestination), VL);
|
||||
@ -1473,7 +1473,7 @@ inline void XM_CALLCONV XMStoreInt3A
|
||||
pDestination[2] = V.vector4_u32[2];
|
||||
#elif defined(_XM_ARM_NEON_INTRINSICS_)
|
||||
uint32x2_t VL = vget_low_u32(vreinterpretq_u32_f32(V));
|
||||
#if defined(_MSC_VER) && !defined(__clang__)
|
||||
#if defined(_MSC_VER) && !defined(__clang__) && !defined(_ARM64_DISTINCT_NEON_TYPES)
|
||||
vst1_u32_ex(pDestination, VL, 64);
|
||||
#else
|
||||
vst1_u32(pDestination, VL);
|
||||
@ -1530,7 +1530,7 @@ inline void XM_CALLCONV XMStoreFloat3A
|
||||
pDestination->z = V.vector4_f32[2];
|
||||
#elif defined(_XM_ARM_NEON_INTRINSICS_)
|
||||
float32x2_t VL = vget_low_f32(V);
|
||||
#if defined(_MSC_VER) && !defined(__clang__)
|
||||
#if defined(_MSC_VER) && !defined(__clang__) && !defined(_ARM64_DISTINCT_NEON_TYPES)
|
||||
vst1_f32_ex(reinterpret_cast<float*>(pDestination), VL, 64);
|
||||
#else
|
||||
vst1_f32(reinterpret_cast<float*>(pDestination), VL);
|
||||
@ -1660,7 +1660,7 @@ inline void XM_CALLCONV XMStoreInt4A
|
||||
pDestination[2] = V.vector4_u32[2];
|
||||
pDestination[3] = V.vector4_u32[3];
|
||||
#elif defined(_XM_ARM_NEON_INTRINSICS_)
|
||||
#if defined(_MSC_VER) && !defined(__clang__)
|
||||
#if defined(_MSC_VER) && !defined(__clang__) && !defined(_ARM64_DISTINCT_NEON_TYPES)
|
||||
vst1q_u32_ex(pDestination, V, 128);
|
||||
#else
|
||||
vst1q_u32(pDestination, vreinterpretq_u32_f32(V));
|
||||
@ -1707,7 +1707,7 @@ inline void XM_CALLCONV XMStoreFloat4A
|
||||
pDestination->z = V.vector4_f32[2];
|
||||
pDestination->w = V.vector4_f32[3];
|
||||
#elif defined(_XM_ARM_NEON_INTRINSICS_)
|
||||
#if defined(_MSC_VER) && !defined(__clang__)
|
||||
#if defined(_MSC_VER) && !defined(__clang__) && !defined(_ARM64_DISTINCT_NEON_TYPES)
|
||||
vst1q_f32_ex(reinterpret_cast<float*>(pDestination), V, 128);
|
||||
#else
|
||||
vst1q_f32(reinterpret_cast<float*>(pDestination), V);
|
||||
@ -1917,7 +1917,7 @@ inline void XM_CALLCONV XMStoreFloat4x3A
|
||||
pDestination->m[3][2] = M.r[3].vector4_f32[2];
|
||||
|
||||
#elif defined(_XM_ARM_NEON_INTRINSICS_)
|
||||
#if defined(_MSC_VER) && !defined(__clang__)
|
||||
#if defined(_MSC_VER) && !defined(__clang__) && !defined(_ARM64_DISTINCT_NEON_TYPES)
|
||||
float32x4_t T1 = vextq_f32(M.r[0], M.r[1], 1);
|
||||
float32x4_t T2 = vbslq_f32(g_XMMask3, M.r[0], T1);
|
||||
vst1q_f32_ex(&pDestination->m[0][0], T2, 128);
|
||||
@ -2061,7 +2061,7 @@ inline void XM_CALLCONV XMStoreFloat3x4A
|
||||
float32x4x2_t T0 = vzipq_f32(P0.val[0], P1.val[0]);
|
||||
float32x4x2_t T1 = vzipq_f32(P0.val[1], P1.val[1]);
|
||||
|
||||
#if defined(_MSC_VER) && !defined(__clang__)
|
||||
#if defined(_MSC_VER) && !defined(__clang__) && !defined(_ARM64_DISTINCT_NEON_TYPES)
|
||||
vst1q_f32_ex(&pDestination->m[0][0], T0.val[0], 128);
|
||||
vst1q_f32_ex(&pDestination->m[1][0], T0.val[1], 128);
|
||||
vst1q_f32_ex(&pDestination->m[2][0], T1.val[0], 128);
|
||||
@ -2170,7 +2170,7 @@ inline void XM_CALLCONV XMStoreFloat4x4A
|
||||
pDestination->m[3][3] = M.r[3].vector4_f32[3];
|
||||
|
||||
#elif defined(_XM_ARM_NEON_INTRINSICS_)
|
||||
#if defined(_MSC_VER) && !defined(__clang__)
|
||||
#if defined(_MSC_VER) && !defined(__clang__) && !defined(_ARM64_DISTINCT_NEON_TYPES)
|
||||
vst1q_f32_ex(reinterpret_cast<float*>(&pDestination->_11), M.r[0], 128);
|
||||
vst1q_f32_ex(reinterpret_cast<float*>(&pDestination->_21), M.r[1], 128);
|
||||
vst1q_f32_ex(reinterpret_cast<float*>(&pDestination->_31), M.r[2], 128);
|
||||
|
@ -1734,7 +1734,7 @@ inline XMVECTOR XM_CALLCONV XMVectorNearEqual
|
||||
|
||||
#elif defined(_XM_ARM_NEON_INTRINSICS_)
|
||||
float32x4_t vDelta = vsubq_f32(V1, V2);
|
||||
#if defined(_MSC_VER) && !defined(__clang__)
|
||||
#if defined(_MSC_VER) && !defined(__clang__) && !defined(_ARM64_DISTINCT_NEON_TYPES)
|
||||
return vacleq_f32(vDelta, Epsilon);
|
||||
#else
|
||||
return vreinterpretq_f32_u32(vcleq_f32(vabsq_f32(vDelta), Epsilon));
|
||||
@ -6328,7 +6328,7 @@ inline bool XM_CALLCONV XMVector2NearEqual
|
||||
(dy <= Epsilon.vector4_f32[1]));
|
||||
#elif defined(_XM_ARM_NEON_INTRINSICS_)
|
||||
float32x2_t vDelta = vsub_f32(vget_low_f32(V1), vget_low_f32(V2));
|
||||
#if defined(_MSC_VER) && !defined(__clang__)
|
||||
#if defined(_MSC_VER) && !defined(__clang__) && !defined(_ARM64_DISTINCT_NEON_TYPES)
|
||||
uint32x2_t vTemp = vacle_f32(vDelta, vget_low_u32(Epsilon));
|
||||
#else
|
||||
uint32x2_t vTemp = vcle_f32(vabs_f32(vDelta), vget_low_f32(Epsilon));
|
||||
@ -9057,7 +9057,7 @@ inline bool XM_CALLCONV XMVector3NearEqual
|
||||
(dz <= Epsilon.vector4_f32[2])) != 0);
|
||||
#elif defined(_XM_ARM_NEON_INTRINSICS_)
|
||||
float32x4_t vDelta = vsubq_f32(V1, V2);
|
||||
#if defined(_MSC_VER) && !defined(__clang__)
|
||||
#if defined(_MSC_VER) && !defined(__clang__) && !defined(_ARM64_DISTINCT_NEON_TYPES)
|
||||
uint32x4_t vResult = vacleq_f32(vDelta, Epsilon);
|
||||
#else
|
||||
uint32x4_t vResult = vcleq_f32(vabsq_f32(vDelta), Epsilon);
|
||||
@ -12924,7 +12924,7 @@ inline bool XM_CALLCONV XMVector4NearEqual
|
||||
(dw <= Epsilon.vector4_f32[3])) != 0);
|
||||
#elif defined(_XM_ARM_NEON_INTRINSICS_)
|
||||
float32x4_t vDelta = vsubq_f32(V1, V2);
|
||||
#if defined(_MSC_VER) && !defined(__clang__)
|
||||
#if defined(_MSC_VER) && !defined(__clang__) && !defined(_ARM64_DISTINCT_NEON_TYPES)
|
||||
uint32x4_t vResult = vacleq_f32(vDelta, Epsilon);
|
||||
#else
|
||||
uint32x4_t vResult = vcleq_f32(vabsq_f32(vDelta), Epsilon);
|
||||
|
Loading…
Reference in New Issue
Block a user