DirectXMath 3.07

2024-11-09 14:10:09 +00:00 · 2016-05-23 14:33:29 -07:00 · 2016-05-23 14:33:29 -07:00 · 51802de35b
commit 51802de35b
parent aee6e900f0
9 changed files with 778 additions and 1299 deletions
--- a/Inc/DirectXCollision.h
+++ b/Inc/DirectXCollision.h
@ -281,7 +281,7 @@ struct BoundingFrustum
    PlaneIntersectionType    XM_CALLCONV     Intersects( _In_ FXMVECTOR Plane ) const;
        // Plane-Frustum test

-    bool    XM_CALLCONV     Intersects( _In_ FXMVECTOR Origin, _In_ FXMVECTOR Direction, _Out_ float& Dist ) const;
+    bool    XM_CALLCONV     Intersects( _In_ FXMVECTOR rayOrigin, _In_ FXMVECTOR Direction, _Out_ float& Dist ) const;
        // Ray-Frustum test

    ContainmentType     XM_CALLCONV     ContainedBy( _In_ FXMVECTOR Plane0, _In_ FXMVECTOR Plane1, _In_ FXMVECTOR Plane2,
--- a/Inc/DirectXCollision.inl
+++ b/Inc/DirectXCollision.inl
@ -1290,13 +1290,13 @@ inline ContainmentType XM_CALLCONV BoundingBox::Contains( FXMVECTOR V0, FXMVECTO
    XMVECTOR vCenter = XMLoadFloat3( &Center );
    XMVECTOR vExtents = XMLoadFloat3( &Extents );

-    XMVECTOR d = XMVector3LengthSq( V0 - vCenter );
+    XMVECTOR d = XMVectorAbs( V0 - vCenter );
    XMVECTOR Inside = XMVectorLessOrEqual( d, vExtents );

-    d = XMVector3LengthSq( V1 - vCenter );
+    d = XMVectorAbs( V1 - vCenter );
    Inside = XMVectorAndInt( Inside, XMVectorLessOrEqual( d, vExtents ) );

-    d = XMVector3LengthSq( V2 - vCenter );
+    d = XMVectorAbs( V2 - vCenter );
    Inside = XMVectorAndInt( Inside, XMVectorLessOrEqual( d, vExtents ) );

    return ( XMVector3EqualInt( Inside, XMVectorTrueInt() ) ) ? CONTAINS : INTERSECTS;
@ -1406,7 +1406,7 @@ inline ContainmentType BoundingBox::Contains( const BoundingOrientedBox& box ) c
    for( size_t i=0; i < BoundingOrientedBox::CORNER_COUNT; ++i )
    {
        XMVECTOR C = XMVector3Rotate( oExtents * g_BoxOffset[i], oOrientation ) + oCenter;
-        XMVECTOR d = XMVector3LengthSq( C );
+        XMVECTOR d = XMVectorAbs(C);
        Inside = XMVectorAndInt( Inside, XMVectorLessOrEqual( d, vExtents ) );
    }

@ -1434,7 +1434,7 @@ inline ContainmentType BoundingBox::Contains( const BoundingFrustum& fr ) const
    for( size_t i=0; i < BoundingFrustum::CORNER_COUNT; ++i )
    {
        XMVECTOR Point = XMLoadFloat3( &Corners[i] );
-        XMVECTOR d = XMVector3LengthSq( Point - vCenter );
+        XMVECTOR d = XMVectorAbs( Point - vCenter );
        Inside = XMVectorAndInt( Inside, XMVectorLessOrEqual( d, vExtents ) );
    }

@ -2547,11 +2547,11 @@ inline bool XM_CALLCONV BoundingOrientedBox::Intersects( FXMVECTOR Origin, FXMVE
 {
    assert( DirectX::Internal::XMVector3IsUnit( Direction ) );

-    static const XMVECTORI32 SelectY =
+    static const XMVECTORU32 SelectY =
    {
        XM_SELECT_0, XM_SELECT_1, XM_SELECT_0, XM_SELECT_0
    };
-    static const XMVECTORI32 SelectZ =
+    static const XMVECTORU32 SelectZ =
    {
        XM_SELECT_0, XM_SELECT_0, XM_SELECT_1, XM_SELECT_0
    };
@ -3365,11 +3365,11 @@ inline bool BoundingFrustum::Intersects( const BoundingBox& box ) const
 _Use_decl_annotations_
 inline bool BoundingFrustum::Intersects( const BoundingOrientedBox& box ) const
 {
-    static const XMVECTORI32 SelectY =
+    static const XMVECTORU32 SelectY =
    {
        XM_SELECT_0, XM_SELECT_1, XM_SELECT_0, XM_SELECT_0
    };
-    static const XMVECTORI32 SelectZ =
+    static const XMVECTORU32 SelectZ =
    {
        XM_SELECT_0, XM_SELECT_0, XM_SELECT_1, XM_SELECT_0
    };
@ -4426,23 +4426,23 @@ inline bool XM_CALLCONV Intersects( FXMVECTOR Origin, FXMVECTOR Direction, FXMVE
 _Use_decl_annotations_
 inline bool XM_CALLCONV Intersects( FXMVECTOR A0, FXMVECTOR A1, FXMVECTOR A2, GXMVECTOR B0, HXMVECTOR B1, HXMVECTOR B2 )
 {
-    static const XMVECTORI32 SelectY =
+    static const XMVECTORU32 SelectY =
    {
        XM_SELECT_0, XM_SELECT_1, XM_SELECT_0, XM_SELECT_0
    };
-    static const XMVECTORI32 SelectZ =
+    static const XMVECTORU32 SelectZ =
    {
        XM_SELECT_0, XM_SELECT_0, XM_SELECT_1, XM_SELECT_0
    };
-    static const XMVECTORI32 Select0111 =
+    static const XMVECTORU32 Select0111 =
    {
        XM_SELECT_0, XM_SELECT_1, XM_SELECT_1, XM_SELECT_1
    };
-    static const XMVECTORI32 Select1011 =
+    static const XMVECTORU32 Select1011 =
    {
        XM_SELECT_1, XM_SELECT_0, XM_SELECT_1, XM_SELECT_1
    };
-    static const XMVECTORI32 Select1101 =
+    static const XMVECTORU32 Select1101 =
    {
        XM_SELECT_1, XM_SELECT_1, XM_SELECT_0, XM_SELECT_1
    };
--- a/Inc/DirectXMath.h
+++ b/Inc/DirectXMath.h
@ -17,20 +17,10 @@
 #error DirectX Math requires C++
 #endif

-#define DIRECTX_MATH_VERSION 306
-
-#if !defined(_XM_BIGENDIAN_) && !defined(_XM_LITTLEENDIAN_)
-#if defined(_M_X64) || defined(_M_IX86) || defined(_M_ARM)
-#define _XM_LITTLEENDIAN_
-#elif defined(_M_PPCBE)
-#define _XM_BIGENDIAN_
-#else
-#error DirectX Math does not support this target
-#endif
-#endif // !_XM_BIGENDIAN_ && !_XM_LITTLEENDIAN_
+#define DIRECTX_MATH_VERSION 307


-#if defined(_MSC_VER) && !defined(_M_ARM) && (!_MANAGED) && (!_M_CEE) && (!defined(_M_IX86_FP) || (_M_IX86_FP > 1)) && !defined(_XM_NO_INTRINSICS_) && !defined(_XM_VECTORCALL_)
+#if defined(_MSC_VER) && !defined(_M_ARM) && !defined(_M_ARM64) && (!_MANAGED) && (!_M_CEE) && (!defined(_M_IX86_FP) || (_M_IX86_FP > 1)) && !defined(_XM_NO_INTRINSICS_) && !defined(_XM_VECTORCALL_)
 #if ((_MSC_FULL_VER >= 170065501) && (_MSC_VER < 1800)) || (_MSC_FULL_VER >= 180020418)
 #define _XM_VECTORCALL_ 1
 #endif
@ -44,21 +34,19 @@



-#if !defined(_XM_ARM_NEON_INTRINSICS_) && !defined(_XM_SSE_INTRINSICS_) && !defined(_XM_VMX128_INTRINSICS_) && !defined(_XM_NO_INTRINSICS_)
+#if !defined(_XM_ARM_NEON_INTRINSICS_) && !defined(_XM_SSE_INTRINSICS_) && !defined(_XM_NO_INTRINSICS_)
 #if defined(_M_IX86) || defined(_M_X64)
 #define _XM_SSE_INTRINSICS_
-#elif defined(_M_PPCBE)
-#define _XM_VMX128_INTRINSICS_
-#elif defined(_M_ARM)
+#elif defined(_M_ARM) || defined(_M_ARM64)
 #define _XM_ARM_NEON_INTRINSICS_
 #elif !defined(_XM_NO_INTRINSICS_)
 #error DirectX Math does not support this target
 #endif
-#endif // !_XM_ARM_NEON_INTRINSICS_ && !_XM_SSE_INTRINSICS_ && !_XM_VMX128_INTRINSICS_ && !_XM_NO_INTRINSICS_
+#endif // !_XM_ARM_NEON_INTRINSICS_ && !_XM_SSE_INTRINSICS_ && !_XM_NO_INTRINSICS_

 #pragma warning(push)
 #pragma warning(disable:4514 4820 4985)
-#include <cmath>
+#include <math.h>
 #include <float.h>
 #include <malloc.h>
 #pragma warning(pop)
@ -69,23 +57,33 @@
 #include <xmmintrin.h>
 #include <emmintrin.h>
 #endif
-#elif defined(_XM_VMX128_INTRINSICS_)
-#error This version of DirectX Math does not support Xbox 360
 #elif defined(_XM_ARM_NEON_INTRINSICS_)
 #ifndef _XM_NO_INTRINSICS_
 #pragma warning(push)
 #pragma warning(disable : 4987)
 #include <intrin.h>
 #pragma warning(pop)
+#ifdef _M_ARM64
+#include <arm64_neon.h>
+#else
 #include <arm_neon.h>
 #endif
 #endif
+#endif



 #include <sal.h>
 #include <assert.h>

+#ifndef _XM_NO_ROUNDF_
+#ifdef _MSC_VER
+#include <yvals.h>
+#if defined(_CPPLIB_VER) && ( _CPPLIB_VER < 610 )
+#define _XM_NO_ROUNDF_
+#endif
+#endif
+#endif

 #pragma warning(push)
 #pragma warning(disable : 4005 4668)
@ -236,13 +234,8 @@ inline bool XMComparisonAnyOutOfBounds(uint32_t CR) { return (((CR) & XM_CRMASK_
 #pragma prefast(push)
 #pragma prefast(disable : 25000, "FXMVECTOR is 16 bytes")

-#ifdef _XM_BIGENDIAN_
-#pragma bitfield_order(push)
-#pragma bitfield_order(lsb_to_msb)
-#endif
-
 //------------------------------------------------------------------------------
-#if defined(_XM_NO_INTRINSICS_) && !defined(_M_PPCBE)
+#if defined(_XM_NO_INTRINSICS_)
 // The __vector4 structure is an intrinsic on Xbox but must be separately defined
 // for x86/x64
 struct __vector4
@ -256,7 +249,7 @@ struct __vector4
 #endif // _XM_NO_INTRINSICS_

 //------------------------------------------------------------------------------
-#if (defined (_M_IX86) || defined(_M_X64) || defined(_M_ARM)) && defined(_XM_NO_INTRINSICS_)
+#ifdef _XM_NO_INTRINSICS_
 typedef uint32_t __vector4i[4];
 #else
 typedef __declspec(align(16)) uint32_t __vector4i[4];
@ -268,38 +261,34 @@ typedef __declspec(align(16)) uint32_t __vector4i[4];
 #if defined(_XM_SSE_INTRINSICS_) && !defined(_XM_NO_INTRINSICS_)
 typedef __m128 XMVECTOR;
 #elif defined(_XM_ARM_NEON_INTRINSICS_) && !defined(_XM_NO_INTRINSICS_)
-typedef __n128 XMVECTOR;
+typedef float32x4_t XMVECTOR;
 #else
 typedef __vector4 XMVECTOR;
 #endif

-// Fix-up for (1st-3rd) XMVECTOR parameters that are pass-in-register for x86, ARM, Xbox 360, and vector call; by reference otherwise
-#if ( defined(_M_IX86) || defined(_M_ARM) || defined(_XM_VMX128_INTRINSICS_) || _XM_VECTORCALL_ ) && !defined(_XM_NO_INTRINSICS_)
+// Fix-up for (1st-3rd) XMVECTOR parameters that are pass-in-register for x86, ARM, ARM64, and vector call; by reference otherwise
+#if ( defined(_M_IX86) || defined(_M_ARM) || defined(_M_ARM64) || _XM_VECTORCALL_ ) && !defined(_XM_NO_INTRINSICS_)
 typedef const XMVECTOR FXMVECTOR;
 #else
 typedef const XMVECTOR& FXMVECTOR;
 #endif

-// Fix-up for (4th) XMVECTOR parameter to pass in-register for ARM, Xbox 360, and x64 vector call; by reference otherwise
-#if ( defined(_M_ARM) || defined(_XM_VMX128_INTRINSICS_) || (_XM_VECTORCALL_ && !defined(_M_IX86) ) ) && !defined(_XM_NO_INTRINSICS_)
+// Fix-up for (4th) XMVECTOR parameter to pass in-register for ARM, ARM64, and x64 vector call; by reference otherwise
+#if ( defined(_M_ARM) || defined(_M_ARM64) || (_XM_VECTORCALL_ && !defined(_M_IX86) ) ) && !defined(_XM_NO_INTRINSICS_)
 typedef const XMVECTOR GXMVECTOR;
 #else
 typedef const XMVECTOR& GXMVECTOR;
 #endif

-// Fix-up for (5th & 6th) XMVECTOR parameter to pass in-register for Xbox 360 and vector call; by reference otherwise
-#if ( defined(_XM_VMX128_INTRINSICS_) || _XM_VECTORCALL_ ) && !defined(_XM_NO_INTRINSICS_)
+// Fix-up for (5th & 6th) XMVECTOR parameter to pass in-register for ARM64 and vector call; by reference otherwise
+#if ( defined(_M_ARM64) || _XM_VECTORCALL_ ) && !defined(_XM_NO_INTRINSICS_)
 typedef const XMVECTOR HXMVECTOR;
 #else
 typedef const XMVECTOR& HXMVECTOR;
 #endif

-// Fix-up for (7th+) XMVECTOR parameters to pass in-register for Xbox 360; by reference otherwise
-#if defined(_XM_VMX128_INTRINSICS_) && !defined(_XM_NO_INTRINSICS_)
-typedef const XMVECTOR CXMVECTOR;
-#else
+// Fix-up for (7th+) XMVECTOR parameters to pass by reference
 typedef const XMVECTOR& CXMVECTOR;
-#endif

 //------------------------------------------------------------------------------
 // Conversion types for constants
@ -391,21 +380,17 @@ XMVECTOR    XM_CALLCONV     operator/ (FXMVECTOR V, float S);

 struct XMMATRIX;

-// Fix-up for (1st) XMMATRIX parameter to pass in-register on Xbox 360 and vector call; by reference otherwise
-#if ( defined(_XM_VMX128_INTRINSICS )|| _XM_VECTORCALL_ ) && !defined(_XM_NO_INTRINSICS_)
+// Fix-up for (1st) XMMATRIX parameter to pass in-register for ARM64 and vector call; by reference otherwise
+#if ( defined(_M_ARM64) || _XM_VECTORCALL_ ) && !defined(_XM_NO_INTRINSICS_)
 typedef const XMMATRIX FXMMATRIX;
 #else
 typedef const XMMATRIX& FXMMATRIX;
 #endif

-// Fix-up for (2nd+) XMMATRIX parameters to pass in-register on Xbox 360, by reference otherwise
-#if defined(_XM_VMX128_INTRINSICS_) && !defined(_XM_NO_INTRINSICS_)
-typedef const XMMATRIX CXMMATRIX;
-#else
+// Fix-up for (2nd+) XMMATRIX parameters to pass by reference
 typedef const XMMATRIX& CXMMATRIX;
-#endif

-#if (defined(_M_IX86) || defined(_M_X64) || defined(_M_ARM)) && defined(_XM_NO_INTRINSICS_)
+#ifdef _XM_NO_INTRINSICS_
 struct XMMATRIX
 #else
 __declspec(align(16)) struct XMMATRIX
@ -745,11 +730,6 @@ __declspec(align(16)) struct XMFLOAT4X4A : public XMFLOAT4X4

 ////////////////////////////////////////////////////////////////////////////////

-
-#ifdef _XM_BIGENDIAN_
-#pragma bitfield_order(pop)
-#endif
-
 #pragma prefast(pop)
 #pragma warning(pop)

@ -759,16 +739,10 @@ __declspec(align(16)) struct XMFLOAT4X4A : public XMFLOAT4X4
 *
 ****************************************************************************/

-#if !defined(_XM_NO_INTRINSICS_) && defined(_XM_VMX128_INTRINSICS_)
-#else
 XMVECTOR    XM_CALLCONV     XMConvertVectorIntToFloat(FXMVECTOR VInt, uint32_t DivExponent);
 XMVECTOR    XM_CALLCONV     XMConvertVectorFloatToInt(FXMVECTOR VFloat, uint32_t MulExponent);
 XMVECTOR    XM_CALLCONV     XMConvertVectorUIntToFloat(FXMVECTOR VUInt, uint32_t DivExponent);
 XMVECTOR    XM_CALLCONV     XMConvertVectorFloatToUInt(FXMVECTOR VFloat, uint32_t MulExponent);
-#endif
-
-#if !defined(_XM_NO_INTRINSICS_) && defined(_XM_VMX128_INTRINSICS_)
-#else

 #if defined(__XNAMATH_H__) && defined(XMVectorSetBinaryConstant)
 #undef XMVectorSetBinaryConstant
@ -779,7 +753,6 @@ XMVECTOR    XM_CALLCONV     XMConvertVectorFloatToUInt(FXMVECTOR VFloat, uint32_
 XMVECTOR    XM_CALLCONV     XMVectorSetBinaryConstant(uint32_t C0, uint32_t C1, uint32_t C2, uint32_t C3);
 XMVECTOR    XM_CALLCONV     XMVectorSplatConstant(int32_t IntConstant, uint32_t DivExponent);
 XMVECTOR    XM_CALLCONV     XMVectorSplatConstantInt(int32_t IntConstant);
-#endif

 /****************************************************************************
 *
@ -1579,8 +1552,6 @@ template<uint32_t SwizzleX, uint32_t SwizzleY, uint32_t SwizzleZ, uint32_t Swizz

 #if defined(_XM_SSE_INTRINSICS_) && !defined(_XM_NO_INTRINSICS_)
    return XM_PERMUTE_PS( V, _MM_SHUFFLE( SwizzleW, SwizzleZ, SwizzleY, SwizzleX ) );
-#elif defined(_XM_VMX128_INTRINSICS_) && !defined(_XM_NO_INTRINSICS_)
-    return __vpermwi(V, ((SwizzleX & 3) << 6) | ((SwizzleY & 3) << 4) | ((SwizzleZ & 3) << 2) | (SwizzleW & 3) );
 #else

    return XMVectorSwizzle( V, SwizzleX, SwizzleY, SwizzleZ, SwizzleW );
@ -1601,10 +1572,10 @@ template<> inline XMVECTOR      XM_CALLCONV     XMVectorSwizzle<3,3,3,3>(FXMVECT

 template<> inline XMVECTOR      XM_CALLCONV     XMVectorSwizzle<1,0,3,2>(FXMVECTOR V) { return vrev64q_f32(V); }

-template<> inline XMVECTOR      XM_CALLCONV     XMVectorSwizzle<0,1,0,1>(FXMVECTOR V) { __n64 vt = vget_low_f32(V); return vcombine_f32( vt, vt ); }
-template<> inline XMVECTOR      XM_CALLCONV     XMVectorSwizzle<2,3,2,3>(FXMVECTOR V) { __n64 vt = vget_high_f32(V); return vcombine_f32( vt, vt ); }
-template<> inline XMVECTOR      XM_CALLCONV     XMVectorSwizzle<1,0,1,0>(FXMVECTOR V) { __n64 vt = vrev64_f32( vget_low_f32(V) ); return vcombine_f32( vt, vt ); }
-template<> inline XMVECTOR      XM_CALLCONV     XMVectorSwizzle<3,2,3,2>(FXMVECTOR V) { __n64 vt = vrev64_f32( vget_high_f32(V) ); return vcombine_f32( vt, vt ); }
+template<> inline XMVECTOR      XM_CALLCONV     XMVectorSwizzle<0,1,0,1>(FXMVECTOR V) { float32x2_t vt = vget_low_f32(V); return vcombine_f32( vt, vt ); }
+template<> inline XMVECTOR      XM_CALLCONV     XMVectorSwizzle<2,3,2,3>(FXMVECTOR V) { float32x2_t vt = vget_high_f32(V); return vcombine_f32( vt, vt ); }
+template<> inline XMVECTOR      XM_CALLCONV     XMVectorSwizzle<1,0,1,0>(FXMVECTOR V) { float32x2_t vt = vrev64_f32( vget_low_f32(V) ); return vcombine_f32( vt, vt ); }
+template<> inline XMVECTOR      XM_CALLCONV     XMVectorSwizzle<3,2,3,2>(FXMVECTOR V) { float32x2_t vt = vrev64_f32( vget_high_f32(V) ); return vcombine_f32( vt, vt ); }

 template<> inline XMVECTOR      XM_CALLCONV     XMVectorSwizzle<0,1,3,2>(FXMVECTOR V) { return vcombine_f32( vget_low_f32(V), vrev64_f32( vget_high_f32(V) ) ); }
 template<> inline XMVECTOR      XM_CALLCONV     XMVectorSwizzle<1,0,2,3>(FXMVECTOR V) { return vcombine_f32( vrev64_f32( vget_low_f32(V) ), vget_high_f32(V) ); }
@ -1633,40 +1604,28 @@ template<uint32_t Elements>
    inline XMVECTOR     XM_CALLCONV     XMVectorShiftLeft(FXMVECTOR V1, FXMVECTOR V2)
 {
    static_assert( Elements < 4, "Elements template parameter out of range" );
-#if defined(_XM_VMX128_INTRINSICS_) && !defined(_XM_NO_INTRINSICS_)
-#else
    return XMVectorPermute<Elements, (Elements + 1), (Elements + 2), (Elements + 3)>(V1, V2);
-#endif
 }

 template<uint32_t Elements>
    inline XMVECTOR     XM_CALLCONV     XMVectorRotateLeft(FXMVECTOR V)
 {
    static_assert( Elements < 4, "Elements template parameter out of range" );
-#if defined(_XM_VMX128_INTRINSICS_) && !defined(_XM_NO_INTRINSICS_)
-#else
    return XMVectorSwizzle<Elements & 3, (Elements + 1) & 3, (Elements + 2) & 3, (Elements + 3) & 3>(V);
-#endif
 }

 template<uint32_t Elements>
    inline XMVECTOR     XM_CALLCONV     XMVectorRotateRight(FXMVECTOR V)
 {
    static_assert( Elements < 4, "Elements template parameter out of range" );
-#if defined(_XM_VMX128_INTRINSICS_) && !defined(_XM_NO_INTRINSICS_)
-#else
    return XMVectorSwizzle<(4 - Elements) & 3, (5 - Elements) & 3, (6 - Elements) & 3, (7 - Elements) & 3>(V);
-#endif
 }

 template<uint32_t VSLeftRotateElements, uint32_t Select0, uint32_t Select1, uint32_t Select2, uint32_t Select3>
    inline XMVECTOR     XM_CALLCONV     XMVectorInsert(FXMVECTOR VD, FXMVECTOR VS)
 {
-#if defined(_XM_VMX128_INTRINSICS_) && !defined(_XM_NO_INTRINSICS_)
-#else
    XMVECTOR Control = XMVectorSelectControl(Select0&1, Select1&1, Select2&1, Select3&1);
    return XMVectorSelect( VD, XMVectorRotateLeft<VSLeftRotateElements>(VS), Control );
-#endif
 }

 /****************************************************************************
@ -1711,13 +1670,13 @@ XMGLOBALCONST XMVECTORF32 g_XMNegIdentityR0       = {-1.0f,0.0f, 0.0f, 0.0f};
 XMGLOBALCONST XMVECTORF32 g_XMNegIdentityR1       = {0.0f,-1.0f, 0.0f, 0.0f};
 XMGLOBALCONST XMVECTORF32 g_XMNegIdentityR2       = {0.0f, 0.0f,-1.0f, 0.0f};
 XMGLOBALCONST XMVECTORF32 g_XMNegIdentityR3       = {0.0f, 0.0f, 0.0f,-1.0f};
-XMGLOBALCONST XMVECTORI32 g_XMNegativeZero      = {0x80000000, 0x80000000, 0x80000000, 0x80000000};
-XMGLOBALCONST XMVECTORI32 g_XMNegate3           = {0x80000000, 0x80000000, 0x80000000, 0x00000000};
-XMGLOBALCONST XMVECTORI32 g_XMMask3             = {0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000};
-XMGLOBALCONST XMVECTORI32 g_XMMaskX             = {0xFFFFFFFF, 0x00000000, 0x00000000, 0x00000000};
-XMGLOBALCONST XMVECTORI32 g_XMMaskY             = {0x00000000, 0xFFFFFFFF, 0x00000000, 0x00000000};
-XMGLOBALCONST XMVECTORI32 g_XMMaskZ             = {0x00000000, 0x00000000, 0xFFFFFFFF, 0x00000000};
-XMGLOBALCONST XMVECTORI32 g_XMMaskW             = {0x00000000, 0x00000000, 0x00000000, 0xFFFFFFFF};
+XMGLOBALCONST XMVECTORU32 g_XMNegativeZero      = {0x80000000, 0x80000000, 0x80000000, 0x80000000};
+XMGLOBALCONST XMVECTORU32 g_XMNegate3           = {0x80000000, 0x80000000, 0x80000000, 0x00000000};
+XMGLOBALCONST XMVECTORU32 g_XMMask3             = {0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000};
+XMGLOBALCONST XMVECTORU32 g_XMMaskX             = {0xFFFFFFFF, 0x00000000, 0x00000000, 0x00000000};
+XMGLOBALCONST XMVECTORU32 g_XMMaskY             = {0x00000000, 0xFFFFFFFF, 0x00000000, 0x00000000};
+XMGLOBALCONST XMVECTORU32 g_XMMaskZ             = {0x00000000, 0x00000000, 0xFFFFFFFF, 0x00000000};
+XMGLOBALCONST XMVECTORU32 g_XMMaskW             = {0x00000000, 0x00000000, 0x00000000, 0xFFFFFFFF};
 XMGLOBALCONST XMVECTORF32 g_XMOne               = { 1.0f, 1.0f, 1.0f, 1.0f};
 XMGLOBALCONST XMVECTORF32 g_XMOne3              = { 1.0f, 1.0f, 1.0f, 0.0f};
 XMGLOBALCONST XMVECTORF32 g_XMZero              = { 0.0f, 0.0f, 0.0f, 0.0f};
@ -1741,20 +1700,20 @@ XMGLOBALCONST XMVECTORI32 g_XMQNaNTest          = {0x007FFFFF, 0x007FFFFF, 0x007
 XMGLOBALCONST XMVECTORI32 g_XMAbsMask           = {0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF};
 XMGLOBALCONST XMVECTORI32 g_XMFltMin            = {0x00800000, 0x00800000, 0x00800000, 0x00800000};
 XMGLOBALCONST XMVECTORI32 g_XMFltMax            = {0x7F7FFFFF, 0x7F7FFFFF, 0x7F7FFFFF, 0x7F7FFFFF};
-XMGLOBALCONST XMVECTORI32 g_XMNegOneMask        = {0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF};
-XMGLOBALCONST XMVECTORI32 g_XMMaskA8R8G8B8      = {0x00FF0000, 0x0000FF00, 0x000000FF, 0xFF000000};
-XMGLOBALCONST XMVECTORI32 g_XMFlipA8R8G8B8      = {0x00000000, 0x00000000, 0x00000000, 0x80000000};
+XMGLOBALCONST XMVECTORU32 g_XMNegOneMask        = {0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF};
+XMGLOBALCONST XMVECTORU32 g_XMMaskA8R8G8B8      = {0x00FF0000, 0x0000FF00, 0x000000FF, 0xFF000000};
+XMGLOBALCONST XMVECTORU32 g_XMFlipA8R8G8B8      = {0x00000000, 0x00000000, 0x00000000, 0x80000000};
 XMGLOBALCONST XMVECTORF32 g_XMFixAA8R8G8B8      = {0.0f,0.0f,0.0f,(float)(0x80000000U)};
 XMGLOBALCONST XMVECTORF32 g_XMNormalizeA8R8G8B8 = {1.0f/(255.0f*(float)(0x10000)),1.0f/(255.0f*(float)(0x100)),1.0f/255.0f,1.0f/(255.0f*(float)(0x1000000))};
-XMGLOBALCONST XMVECTORI32 g_XMMaskA2B10G10R10   = {0x000003FF, 0x000FFC00, 0x3FF00000, 0xC0000000};
-XMGLOBALCONST XMVECTORI32 g_XMFlipA2B10G10R10   = {0x00000200, 0x00080000, 0x20000000, 0x80000000};
+XMGLOBALCONST XMVECTORU32 g_XMMaskA2B10G10R10   = {0x000003FF, 0x000FFC00, 0x3FF00000, 0xC0000000};
+XMGLOBALCONST XMVECTORU32 g_XMFlipA2B10G10R10   = {0x00000200, 0x00080000, 0x20000000, 0x80000000};
 XMGLOBALCONST XMVECTORF32 g_XMFixAA2B10G10R10   = {-512.0f,-512.0f*(float)(0x400),-512.0f*(float)(0x100000),(float)(0x80000000U)};
 XMGLOBALCONST XMVECTORF32 g_XMNormalizeA2B10G10R10 = {1.0f/511.0f,1.0f/(511.0f*(float)(0x400)),1.0f/(511.0f*(float)(0x100000)),1.0f/(3.0f*(float)(0x40000000))};
-XMGLOBALCONST XMVECTORI32 g_XMMaskX16Y16        = {0x0000FFFF, 0xFFFF0000, 0x00000000, 0x00000000};
+XMGLOBALCONST XMVECTORU32 g_XMMaskX16Y16        = {0x0000FFFF, 0xFFFF0000, 0x00000000, 0x00000000};
 XMGLOBALCONST XMVECTORI32 g_XMFlipX16Y16        = {0x00008000, 0x00000000, 0x00000000, 0x00000000};
 XMGLOBALCONST XMVECTORF32 g_XMFixX16Y16         = {-32768.0f,0.0f,0.0f,0.0f};
 XMGLOBALCONST XMVECTORF32 g_XMNormalizeX16Y16   = {1.0f/32767.0f,1.0f/(32767.0f*65536.0f),0.0f,0.0f};
-XMGLOBALCONST XMVECTORI32 g_XMMaskX16Y16Z16W16  = {0x0000FFFF, 0x0000FFFF, 0xFFFF0000, 0xFFFF0000};
+XMGLOBALCONST XMVECTORU32 g_XMMaskX16Y16Z16W16  = {0x0000FFFF, 0x0000FFFF, 0xFFFF0000, 0xFFFF0000};
 XMGLOBALCONST XMVECTORI32 g_XMFlipX16Y16Z16W16  = {0x00008000, 0x00008000, 0x00000000, 0x00000000};
 XMGLOBALCONST XMVECTORF32 g_XMFixX16Y16Z16W16   = {-32768.0f,-32768.0f,0.0f,0.0f};
 XMGLOBALCONST XMVECTORF32 g_XMNormalizeX16Y16Z16W16 = {1.0f/32767.0f,1.0f/32767.0f,1.0f/(32767.0f*65536.0f),1.0f/(32767.0f*65536.0f)};
@ -1764,27 +1723,27 @@ XMGLOBALCONST XMVECTORF32 g_XMNegateX           = {-1.0f, 1.0f, 1.0f, 1.0f};
 XMGLOBALCONST XMVECTORF32 g_XMNegateY           = { 1.0f,-1.0f, 1.0f, 1.0f};
 XMGLOBALCONST XMVECTORF32 g_XMNegateZ           = { 1.0f, 1.0f,-1.0f, 1.0f};
 XMGLOBALCONST XMVECTORF32 g_XMNegateW           = { 1.0f, 1.0f, 1.0f,-1.0f};
-XMGLOBALCONST XMVECTORI32 g_XMSelect0101        = {XM_SELECT_0, XM_SELECT_1, XM_SELECT_0, XM_SELECT_1};
-XMGLOBALCONST XMVECTORI32 g_XMSelect1010        = {XM_SELECT_1, XM_SELECT_0, XM_SELECT_1, XM_SELECT_0};
+XMGLOBALCONST XMVECTORU32 g_XMSelect0101        = {XM_SELECT_0, XM_SELECT_1, XM_SELECT_0, XM_SELECT_1};
+XMGLOBALCONST XMVECTORU32 g_XMSelect1010        = {XM_SELECT_1, XM_SELECT_0, XM_SELECT_1, XM_SELECT_0};
 XMGLOBALCONST XMVECTORI32 g_XMOneHalfMinusEpsilon = { 0x3EFFFFFD, 0x3EFFFFFD, 0x3EFFFFFD, 0x3EFFFFFD};
-XMGLOBALCONST XMVECTORI32 g_XMSelect1000        = {XM_SELECT_1, XM_SELECT_0, XM_SELECT_0, XM_SELECT_0};
-XMGLOBALCONST XMVECTORI32 g_XMSelect1100        = {XM_SELECT_1, XM_SELECT_1, XM_SELECT_0, XM_SELECT_0};
-XMGLOBALCONST XMVECTORI32 g_XMSelect1110        = {XM_SELECT_1, XM_SELECT_1, XM_SELECT_1, XM_SELECT_0};
-XMGLOBALCONST XMVECTORI32 g_XMSelect1011          = { XM_SELECT_1, XM_SELECT_0, XM_SELECT_1, XM_SELECT_1 };
+XMGLOBALCONST XMVECTORU32 g_XMSelect1000        = {XM_SELECT_1, XM_SELECT_0, XM_SELECT_0, XM_SELECT_0};
+XMGLOBALCONST XMVECTORU32 g_XMSelect1100        = {XM_SELECT_1, XM_SELECT_1, XM_SELECT_0, XM_SELECT_0};
+XMGLOBALCONST XMVECTORU32 g_XMSelect1110        = {XM_SELECT_1, XM_SELECT_1, XM_SELECT_1, XM_SELECT_0};
+XMGLOBALCONST XMVECTORU32 g_XMSelect1011          = { XM_SELECT_1, XM_SELECT_0, XM_SELECT_1, XM_SELECT_1 };
 XMGLOBALCONST XMVECTORF32 g_XMFixupY16          = {1.0f,1.0f/65536.0f,0.0f,0.0f};
 XMGLOBALCONST XMVECTORF32 g_XMFixupY16W16       = {1.0f,1.0f,1.0f/65536.0f,1.0f/65536.0f};
-XMGLOBALCONST XMVECTORI32 g_XMFlipY             = {0,0x80000000,0,0};
-XMGLOBALCONST XMVECTORI32 g_XMFlipZ             = {0,0,0x80000000,0};
-XMGLOBALCONST XMVECTORI32 g_XMFlipW             = {0,0,0,0x80000000};
-XMGLOBALCONST XMVECTORI32 g_XMFlipYZ            = {0,0x80000000,0x80000000,0};
-XMGLOBALCONST XMVECTORI32 g_XMFlipZW            = {0,0,0x80000000,0x80000000};
-XMGLOBALCONST XMVECTORI32 g_XMFlipYW            = {0,0x80000000,0,0x80000000};
+XMGLOBALCONST XMVECTORU32 g_XMFlipY             = {0,0x80000000,0,0};
+XMGLOBALCONST XMVECTORU32 g_XMFlipZ             = {0,0,0x80000000,0};
+XMGLOBALCONST XMVECTORU32 g_XMFlipW             = {0,0,0,0x80000000};
+XMGLOBALCONST XMVECTORU32 g_XMFlipYZ            = {0,0x80000000,0x80000000,0};
+XMGLOBALCONST XMVECTORU32 g_XMFlipZW            = {0,0,0x80000000,0x80000000};
+XMGLOBALCONST XMVECTORU32 g_XMFlipYW            = {0,0x80000000,0,0x80000000};
 XMGLOBALCONST XMVECTORI32 g_XMMaskDec4          = {0x3FF,0x3FF<<10,0x3FF<<20,0x3<<30};
 XMGLOBALCONST XMVECTORI32 g_XMXorDec4           = {0x200,0x200<<10,0x200<<20,0};
 XMGLOBALCONST XMVECTORF32 g_XMAddUDec4          = {0,0,0,32768.0f*65536.0f};
 XMGLOBALCONST XMVECTORF32 g_XMAddDec4           = {-512.0f,-512.0f*1024.0f,-512.0f*1024.0f*1024.0f,0};
 XMGLOBALCONST XMVECTORF32 g_XMMulDec4           = {1.0f,1.0f/1024.0f,1.0f/(1024.0f*1024.0f),1.0f/(1024.0f*1024.0f*1024.0f)};
-XMGLOBALCONST XMVECTORI32 g_XMMaskByte4         = {0xFF,0xFF00,0xFF0000,0xFF000000};
+XMGLOBALCONST XMVECTORU32 g_XMMaskByte4         = {0xFF,0xFF00,0xFF0000,0xFF000000};
 XMGLOBALCONST XMVECTORI32 g_XMXorByte4          = {0x80,0x8000,0x800000,0x00000000};
 XMGLOBALCONST XMVECTORF32 g_XMAddByte4          = {-128.0f,-128.0f*256.0f,-128.0f*65536.0f,0};
 XMGLOBALCONST XMVECTORF32 g_XMFixUnsigned       = {32768.0f*65536.0f,32768.0f*65536.0f,32768.0f*65536.0f,32768.0f*65536.0f};
@ -1798,10 +1757,10 @@ XMGLOBALCONST XMVECTORI32 g_XMExponentBias      = {127, 127, 127, 127};
 XMGLOBALCONST XMVECTORI32 g_XMSubnormalExponent = {-126, -126, -126, -126};
 XMGLOBALCONST XMVECTORI32 g_XMNumTrailing       = {23, 23, 23, 23};
 XMGLOBALCONST XMVECTORI32 g_XMMinNormal         = {0x00800000, 0x00800000, 0x00800000, 0x00800000};
-XMGLOBALCONST XMVECTORI32 g_XMNegInfinity       = {0xFF800000, 0xFF800000, 0xFF800000, 0xFF800000};
-XMGLOBALCONST XMVECTORI32 g_XMNegQNaN           = {0xFFC00000, 0xFFC00000, 0xFFC00000, 0xFFC00000};
+XMGLOBALCONST XMVECTORU32 g_XMNegInfinity       = {0xFF800000, 0xFF800000, 0xFF800000, 0xFF800000};
+XMGLOBALCONST XMVECTORU32 g_XMNegQNaN           = {0xFFC00000, 0xFFC00000, 0xFFC00000, 0xFFC00000};
 XMGLOBALCONST XMVECTORI32 g_XMBin128            = {0x43000000, 0x43000000, 0x43000000, 0x43000000};
-XMGLOBALCONST XMVECTORI32 g_XMBinNeg150         = {0xC3160000, 0xC3160000, 0xC3160000, 0xC3160000};
+XMGLOBALCONST XMVECTORU32 g_XMBinNeg150         = {0xC3160000, 0xC3160000, 0xC3160000, 0xC3160000};
 XMGLOBALCONST XMVECTORI32 g_XM253               = {253, 253, 253, 253};
 XMGLOBALCONST XMVECTORF32 g_XMExpEst1           = {-6.93147182e-1f, -6.93147182e-1f, -6.93147182e-1f, -6.93147182e-1f};
 XMGLOBALCONST XMVECTORF32 g_XMExpEst2           = {+2.40226462e-1f, +2.40226462e-1f, +2.40226462e-1f, +2.40226462e-1f};
@ -1835,8 +1794,6 @@ XMGLOBALCONST XMVECTORF32 g_XMInvLgE            = {+6.93147182e-1f, +6.93147182e

 //------------------------------------------------------------------------------

-#if defined(_XM_NO_INTRINSICS_) || defined(_XM_SSE_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
-
 inline XMVECTOR XM_CALLCONV XMVectorSetBinaryConstant(uint32_t C0, uint32_t C1, uint32_t C2, uint32_t C3)
 {
 #if defined(_XM_NO_INTRINSICS_)
@ -1926,15 +1883,11 @@ inline XMVECTOR XM_CALLCONV XMVectorSplatConstantInt(int32_t IntConstant)
 #endif
 }

-// Implemented for VMX128 intrinsics as #defines aboves
-#endif // _XM_NO_INTRINSICS_ || _XM_SSE_INTRINSICS_ || _XM_ARM_NEON_INTRINSICS_
-
 #include "DirectXMathConvert.inl"
 #include "DirectXMathVector.inl"
 #include "DirectXMathMatrix.inl"
 #include "DirectXMathMisc.inl"

-
 #pragma prefast(pop)
 #pragma warning(pop)

--- a/Inc/DirectXMathConvert.inl
+++ b/Inc/DirectXMathConvert.inl
@ -21,9 +21,6 @@

 //------------------------------------------------------------------------------

-#if defined(_XM_NO_INTRINSICS_) || defined(_XM_SSE_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
-// For VMX128, these routines are all defines in the main header
-
 #pragma warning(push)
 #pragma warning(disable:4701) // Prevent warnings about 'Result' potentially being used without having been initialized

@ -218,8 +215,6 @@ inline XMVECTOR XM_CALLCONV XMConvertVectorFloatToUInt

 #pragma warning(pop)

-#endif // _XM_NO_INTRINSICS_ || _XM_SSE_INTRINSICS_ || _XM_ARM_NEON_INTRINSICS_
-
 /****************************************************************************
 *
 * Vector and matrix load operations
@ -243,8 +238,7 @@ inline XMVECTOR XM_CALLCONV XMLoadInt(const uint32_t* pSource)
    return vld1q_lane_u32( pSource, zero, 0 );
 #elif defined(_XM_SSE_INTRINSICS_)
    return _mm_load_ss( reinterpret_cast<const float*>(pSource) );
-#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
-#endif // _XM_VMX128_INTRINSICS_
+#endif
 }

 //------------------------------------------------------------------------------
@ -264,8 +258,7 @@ inline XMVECTOR XM_CALLCONV XMLoadFloat(const float* pSource)
    return vld1q_lane_f32( pSource, zero, 0 );
 #elif defined(_XM_SSE_INTRINSICS_)
    return _mm_load_ss( pSource );
-#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
-#endif // _XM_VMX128_INTRINSICS_
+#endif
 }

 //------------------------------------------------------------------------------
@ -291,8 +284,7 @@ inline XMVECTOR XM_CALLCONV XMLoadInt2
    __m128 x = _mm_load_ss( reinterpret_cast<const float*>(pSource) );
    __m128 y = _mm_load_ss( reinterpret_cast<const float*>(pSource+1) );
    return _mm_unpacklo_ps( x, y );
-#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
-#endif // _XM_VMX128_INTRINSICS_
+#endif
 }

 //------------------------------------------------------------------------------
@ -318,8 +310,7 @@ inline XMVECTOR XM_CALLCONV XMLoadInt2A
 #elif defined(_XM_SSE_INTRINSICS_)
    __m128i V = _mm_loadl_epi64( reinterpret_cast<const __m128i*>(pSource) );
    return _mm_castsi128_ps(V);
-#else // _XM_VMX128_INTRINSICS_
-#endif // _XM_VMX128_INTRINSICS_
+#endif
 }

 //------------------------------------------------------------------------------
@ -345,8 +336,7 @@ inline XMVECTOR XM_CALLCONV XMLoadFloat2
    __m128 x = _mm_load_ss( &pSource->x );
    __m128 y = _mm_load_ss( &pSource->y );
    return _mm_unpacklo_ps( x, y );
-#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
-#endif // _XM_VMX128_INTRINSICS_
+#endif
 }

 //------------------------------------------------------------------------------
@ -372,8 +362,7 @@ inline XMVECTOR XM_CALLCONV XMLoadFloat2A
 #elif defined(_XM_SSE_INTRINSICS_)
    __m128i V = _mm_loadl_epi64( reinterpret_cast<const __m128i*>(pSource) );
    return _mm_castsi128_ps(V);
-#else // _XM_VMX128_INTRINSICS_
-#endif // _XM_VMX128_INTRINSICS_
+#endif
 }

 //------------------------------------------------------------------------------
@ -401,8 +390,7 @@ inline XMVECTOR XM_CALLCONV XMLoadSInt2
    __m128 y = _mm_load_ss( reinterpret_cast<const float*>(&pSource->y) );
    __m128 V = _mm_unpacklo_ps( x, y );
    return _mm_cvtepi32_ps(_mm_castps_si128(V));
-#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
-#endif // _XM_VMX128_INTRINSICS_
+#endif
 }

 //------------------------------------------------------------------------------
@ -442,8 +430,7 @@ inline XMVECTOR XM_CALLCONV XMLoadUInt2
    vMask = _mm_and_ps(_mm_castsi128_ps(iMask),g_XMFixUnsigned);
    vResult = _mm_add_ps(vResult,vMask);
    return vResult;
-#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
-#endif // _XM_VMX128_INTRINSICS_
+#endif
 }

 //------------------------------------------------------------------------------
@ -472,8 +459,7 @@ inline XMVECTOR XM_CALLCONV XMLoadInt3
    __m128 z = _mm_load_ss( reinterpret_cast<const float*>(pSource+2) );
    __m128 xy = _mm_unpacklo_ps( x, y );
    return _mm_movelh_ps( xy, z );
-#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
-#endif // _XM_VMX128_INTRINSICS_
+#endif
 }

 //------------------------------------------------------------------------------
@ -501,8 +487,7 @@ inline XMVECTOR XM_CALLCONV XMLoadInt3A
    __m128i V = _mm_load_si128( reinterpret_cast<const __m128i*>(pSource) );
    V = _mm_and_si128( V, g_XMMask3 );
    return _mm_castsi128_ps(V);
-#else // _XM_VMX128_INTRINSICS_
-#endif // _XM_VMX128_INTRINSICS_
+#endif
 }

 //------------------------------------------------------------------------------
@ -531,8 +516,7 @@ inline XMVECTOR XM_CALLCONV XMLoadFloat3
    __m128 z = _mm_load_ss( &pSource->z );
    __m128 xy = _mm_unpacklo_ps( x, y );
    return _mm_movelh_ps( xy, z );
-#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
-#endif // _XM_VMX128_INTRINSICS_
+#endif
 }

 //------------------------------------------------------------------------------
@ -559,8 +543,7 @@ inline XMVECTOR XM_CALLCONV XMLoadFloat3A
    // Reads an extra float which is zero'd
    __m128 V = _mm_load_ps( &pSource->x );
    return _mm_and_ps( V, g_XMMask3 );
-#else // _XM_VMX128_INTRINSICS_
-#endif // _XM_VMX128_INTRINSICS_
+#endif
 }

 //------------------------------------------------------------------------------
@ -593,8 +576,7 @@ inline XMVECTOR XM_CALLCONV XMLoadSInt3
    __m128 xy = _mm_unpacklo_ps( x, y );
    __m128 V = _mm_movelh_ps( xy, z );
    return _mm_cvtepi32_ps(_mm_castps_si128(V));
-#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
-#endif // _XM_VMX128_INTRINSICS_
+#endif
 }

 //------------------------------------------------------------------------------
@ -638,8 +620,7 @@ inline XMVECTOR XM_CALLCONV XMLoadUInt3
    vResult = _mm_add_ps(vResult,vMask);
    return vResult; 

-#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
-#endif // _XM_VMX128_INTRINSICS_
+#endif
 }

 //------------------------------------------------------------------------------
@ -663,8 +644,7 @@ inline XMVECTOR XM_CALLCONV XMLoadInt4
 #elif defined(_XM_SSE_INTRINSICS_)
    __m128i V = _mm_loadu_si128( reinterpret_cast<const __m128i*>(pSource) );
    return _mm_castsi128_ps(V);
-#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
-#endif // _XM_VMX128_INTRINSICS_
+#endif
 }

 //------------------------------------------------------------------------------
@ -688,8 +668,7 @@ inline XMVECTOR XM_CALLCONV XMLoadInt4A
 #elif defined(_XM_SSE_INTRINSICS_)
    __m128i V = _mm_load_si128( reinterpret_cast<const __m128i*>(pSource) );
    return _mm_castsi128_ps(V);
-#else // _XM_VMX128_INTRINSICS_
-#endif // _XM_VMX128_INTRINSICS_
+#endif
 }

 //------------------------------------------------------------------------------
@ -711,8 +690,7 @@ inline XMVECTOR XM_CALLCONV XMLoadFloat4
    return vld1q_f32( reinterpret_cast<const float*>(pSource) );
 #elif defined(_XM_SSE_INTRINSICS_)
    return _mm_loadu_ps( &pSource->x );
-#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
-#endif // _XM_VMX128_INTRINSICS_
+#endif
 }

 //------------------------------------------------------------------------------
@ -735,8 +713,7 @@ inline XMVECTOR XM_CALLCONV XMLoadFloat4A
    return vld1q_f32_ex( reinterpret_cast<const float*>(pSource), 128 );
 #elif defined(_XM_SSE_INTRINSICS_)
    return _mm_load_ps( &pSource->x );
-#else // _XM_VMX128_INTRINSICS_
-#endif // _XM_VMX128_INTRINSICS_
+#endif
 }

 //------------------------------------------------------------------------------
@ -762,8 +739,7 @@ inline XMVECTOR XM_CALLCONV XMLoadSInt4
 #elif defined(_XM_SSE_INTRINSICS_)
    __m128i V = _mm_loadu_si128( reinterpret_cast<const __m128i*>(pSource) );
    return _mm_cvtepi32_ps(V);
-#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
-#endif // _XM_VMX128_INTRINSICS_
+#endif
 }

 //------------------------------------------------------------------------------
@ -799,8 +775,7 @@ inline XMVECTOR XM_CALLCONV XMLoadUInt4
    vMask = _mm_and_ps(_mm_castsi128_ps(iMask),g_XMFixUnsigned);
    vResult = _mm_add_ps(vResult,vMask);
    return vResult;
-#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
-#endif // _XM_VMX128_INTRINSICS_
+#endif
 }

 //------------------------------------------------------------------------------
@ -865,8 +840,7 @@ inline XMMATRIX XM_CALLCONV XMLoadFloat3x3
    M.r[2] = _mm_shuffle_ps( V2, V3, _MM_SHUFFLE(1, 0, 3, 2) );
    M.r[3] = g_XMIdentityR3;
    return M;
-#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
-#endif // _XM_VMX128_INTRINSICS_
+#endif
 }

 //------------------------------------------------------------------------------
@ -946,8 +920,7 @@ inline XMMATRIX XM_CALLCONV XMLoadFloat4x3
            vTemp3,
            _mm_castsi128_ps(vTemp4i));
    return M;
-#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
-#endif // _XM_VMX128_INTRINSICS_
+#endif
 }

 //------------------------------------------------------------------------------
@ -1028,8 +1001,7 @@ inline XMMATRIX XM_CALLCONV XMLoadFloat4x3A
            vTemp3,
            _mm_castsi128_ps(vTemp4i));
    return M;
-#else // _XM_VMX128_INTRINSICS_
-#endif // _XM_VMX128_INTRINSICS_
+#endif
 }

 //------------------------------------------------------------------------------
@ -1078,8 +1050,7 @@ inline XMMATRIX XM_CALLCONV XMLoadFloat4x4
    M.r[2] = _mm_loadu_ps( &pSource->_31 );
    M.r[3] = _mm_loadu_ps( &pSource->_41 );
    return M;
-#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
-#endif // _XM_VMX128_INTRINSICS_
+#endif
 }

 //------------------------------------------------------------------------------
@ -1129,8 +1100,7 @@ inline XMMATRIX XM_CALLCONV XMLoadFloat4x4A
    M.r[2] = _mm_load_ps( &pSource->_31 );
    M.r[3] = _mm_load_ps( &pSource->_41 );
    return M;
-#else // _XM_VMX128_INTRINSICS_
-#endif // _XM_VMX128_INTRINSICS_
+#endif
 }

 /****************************************************************************
@ -1149,11 +1119,10 @@ inline void XM_CALLCONV XMStoreInt
 #if defined(_XM_NO_INTRINSICS_)
    *pDestination = XMVectorGetIntX( V );
 #elif defined(_XM_ARM_NEON_INTRINSICS_)
-    vst1q_lane_u32( pDestination, V, 0 );
+    vst1q_lane_u32( pDestination, *reinterpret_cast<const uint32x4_t*>(&V), 0 );
 #elif defined(_XM_SSE_INTRINSICS_)
    _mm_store_ss( reinterpret_cast<float*>(pDestination), V );
-#else // _XM_VMX128_INTRINSICS_
-#endif // _XM_VMX128_INTRINSICS_
+#endif
 }

 //------------------------------------------------------------------------------
@ -1171,8 +1140,7 @@ inline void XM_CALLCONV XMStoreFloat
    vst1q_lane_f32( pDestination, V, 0 );
 #elif defined(_XM_SSE_INTRINSICS_)
    _mm_store_ss( pDestination, V );
-#else // _XM_VMX128_INTRINSICS_
-#endif // _XM_VMX128_INTRINSICS_
+#endif
 }

 //------------------------------------------------------------------------------
@ -1194,8 +1162,7 @@ inline void XM_CALLCONV XMStoreInt2
    XMVECTOR T = XM_PERMUTE_PS( V, _MM_SHUFFLE( 1, 1, 1, 1 ) );
    _mm_store_ss( reinterpret_cast<float*>(&pDestination[0]), V );
    _mm_store_ss( reinterpret_cast<float*>(&pDestination[1]), T );
-#else // _XM_VMX128_INTRINSICS_
-#endif // _XM_VMX128_INTRINSICS_
+#endif
 }

 //------------------------------------------------------------------------------
@ -1216,8 +1183,7 @@ inline void XM_CALLCONV XMStoreInt2A
    vst1_u32_ex( pDestination, VL, 64 );
 #elif defined(_XM_SSE_INTRINSICS_)
    _mm_storel_epi64( reinterpret_cast<__m128i*>(pDestination), _mm_castps_si128(V) );
-#else // _XM_VMX128_INTRINSICS_
-#endif // _XM_VMX128_INTRINSICS_
+#endif
 }

 //------------------------------------------------------------------------------
@ -1239,8 +1205,7 @@ inline void XM_CALLCONV XMStoreFloat2
    XMVECTOR T = XM_PERMUTE_PS( V, _MM_SHUFFLE( 1, 1, 1, 1 ) );
    _mm_store_ss( &pDestination->x, V );
    _mm_store_ss( &pDestination->y, T );
-#else // _XM_VMX128_INTRINSICS_
-#endif // _XM_VMX128_INTRINSICS_
+#endif
 }

 //------------------------------------------------------------------------------
@ -1261,8 +1226,7 @@ inline void XM_CALLCONV XMStoreFloat2A
    vst1_f32_ex( reinterpret_cast<float*>(pDestination), VL, 64 );
 #elif defined(_XM_SSE_INTRINSICS_)
    _mm_storel_epi64( reinterpret_cast<__m128i*>(pDestination), _mm_castps_si128(V) );
-#else // _XM_VMX128_INTRINSICS_
-#endif // _XM_VMX128_INTRINSICS_
+#endif
 }

 //------------------------------------------------------------------------------
@ -1294,8 +1258,7 @@ inline void XM_CALLCONV XMStoreSInt2
    XMVECTOR T = XM_PERMUTE_PS( vOverflow, _MM_SHUFFLE( 1, 1, 1, 1 ) );
    _mm_store_ss( reinterpret_cast<float*>(&pDestination->x), vOverflow );
    _mm_store_ss( reinterpret_cast<float*>(&pDestination->y), T );
-#else // _XM_VMX128_INTRINSICS_
-#endif // _XM_VMX128_INTRINSICS_
+#endif
 }

 //------------------------------------------------------------------------------
@ -1336,8 +1299,7 @@ inline void XM_CALLCONV XMStoreUInt2
    XMVECTOR T = XM_PERMUTE_PS( vResult, _MM_SHUFFLE( 1, 1, 1, 1 ) );
    _mm_store_ss( reinterpret_cast<float*>(&pDestination->x), vResult );
    _mm_store_ss( reinterpret_cast<float*>(&pDestination->y), T );
-#else // _XM_VMX128_INTRINSICS_
-#endif // _XM_VMX128_INTRINSICS_
+#endif
 }

 //------------------------------------------------------------------------------
@ -1356,15 +1318,14 @@ inline void XM_CALLCONV XMStoreInt3
 #elif defined(_XM_ARM_NEON_INTRINSICS_)
    uint32x2_t VL = vget_low_u32(V);
    vst1_u32( pDestination, VL );
-    vst1q_lane_u32( pDestination+2, V, 2 );
+    vst1q_lane_u32( pDestination+2, *reinterpret_cast<const uint32x4_t*>(&V), 2 );
 #elif defined(_XM_SSE_INTRINSICS_)
    XMVECTOR T1 = XM_PERMUTE_PS(V,_MM_SHUFFLE(1,1,1,1));
    XMVECTOR T2 = XM_PERMUTE_PS(V,_MM_SHUFFLE(2,2,2,2));
    _mm_store_ss( reinterpret_cast<float*>(pDestination), V );
    _mm_store_ss( reinterpret_cast<float*>(&pDestination[1]), T1 );
    _mm_store_ss( reinterpret_cast<float*>(&pDestination[2]), T2 );
-#else // _XM_VMX128_INTRINSICS_
-#endif // _XM_VMX128_INTRINSICS_
+#endif
 }

 //------------------------------------------------------------------------------
@ -1384,13 +1345,12 @@ inline void XM_CALLCONV XMStoreInt3A
 #elif defined(_XM_ARM_NEON_INTRINSICS_)
    uint32x2_t VL = vget_low_u32(V);
    vst1_u32_ex( pDestination, VL, 64 );
-    vst1q_lane_u32( pDestination+2, V, 2 );
+    vst1q_lane_u32( pDestination+2, *reinterpret_cast<const uint32x4_t*>(&V), 2 );
 #elif defined(_XM_SSE_INTRINSICS_)
    XMVECTOR T = XM_PERMUTE_PS(V,_MM_SHUFFLE(2,2,2,2));
    _mm_storel_epi64( reinterpret_cast<__m128i*>(pDestination), _mm_castps_si128(V) );
    _mm_store_ss( reinterpret_cast<float*>(&pDestination[2]), T );
-#else // _XM_VMX128_INTRINSICS_
-#endif // _XM_VMX128_INTRINSICS_
+#endif
 }

 //------------------------------------------------------------------------------
@ -1416,8 +1376,7 @@ inline void XM_CALLCONV XMStoreFloat3
    _mm_store_ss( &pDestination->x, V );
    _mm_store_ss( &pDestination->y, T1 );
    _mm_store_ss( &pDestination->z, T2 );
-#else // _XM_VMX128_INTRINSICS_
-#endif // _XM_VMX128_INTRINSICS_
+#endif
 }

 //------------------------------------------------------------------------------
@ -1442,8 +1401,7 @@ inline void XM_CALLCONV XMStoreFloat3A
    XMVECTOR T = XM_PERMUTE_PS(V,_MM_SHUFFLE(2,2,2,2));
    _mm_storel_epi64( reinterpret_cast<__m128i*>(pDestination), _mm_castps_si128(V) );
    _mm_store_ss( &pDestination->z, T );
-#else // _XM_VMX128_INTRINSICS_
-#endif // _XM_VMX128_INTRINSICS_
+#endif
 }

 //------------------------------------------------------------------------------
@ -1479,8 +1437,7 @@ inline void XM_CALLCONV XMStoreSInt3
    _mm_store_ss( reinterpret_cast<float*>(&pDestination->x), vOverflow );
    _mm_store_ss( reinterpret_cast<float*>(&pDestination->y), T1 );
    _mm_store_ss( reinterpret_cast<float*>(&pDestination->z), T2 );
-#else // _XM_VMX128_INTRINSICS_
-#endif // _XM_VMX128_INTRINSICS_
+#endif
 }

 //------------------------------------------------------------------------------
@ -1525,8 +1482,7 @@ inline void XM_CALLCONV XMStoreUInt3
    _mm_store_ss( reinterpret_cast<float*>(&pDestination->x), vResult );
    _mm_store_ss( reinterpret_cast<float*>(&pDestination->y), T1 );
    _mm_store_ss( reinterpret_cast<float*>(&pDestination->z), T2 );
-#else // _XM_VMX128_INTRINSICS_
-#endif // _XM_VMX128_INTRINSICS_
+#endif
 }

 //------------------------------------------------------------------------------
@ -1547,8 +1503,7 @@ inline void XM_CALLCONV XMStoreInt4
    vst1q_u32( pDestination, V );
 #elif defined(_XM_SSE_INTRINSICS_)
    _mm_storeu_si128( reinterpret_cast<__m128i*>(pDestination), _mm_castps_si128(V) );
-#else // _XM_VMX128_INTRINSICS_
-#endif // _XM_VMX128_INTRINSICS_
+#endif
 }

 //------------------------------------------------------------------------------
@ -1570,11 +1525,9 @@ inline void XM_CALLCONV XMStoreInt4A
    vst1q_u32_ex( pDestination, V, 128 );
 #elif defined(_XM_SSE_INTRINSICS_)
    _mm_store_si128( reinterpret_cast<__m128i*>(pDestination), _mm_castps_si128(V) );
-#else // _XM_VMX128_INTRINSICS_
-#endif // _XM_VMX128_INTRINSICS_
+#endif
 }

-
 //------------------------------------------------------------------------------
 _Use_decl_annotations_
 inline void XM_CALLCONV XMStoreFloat4
@ -1593,8 +1546,7 @@ inline void XM_CALLCONV XMStoreFloat4
    vst1q_f32( reinterpret_cast<float*>(pDestination), V );
 #elif defined(_XM_SSE_INTRINSICS_)
    _mm_storeu_ps( &pDestination->x, V );
-#else // _XM_VMX128_INTRINSICS_
-#endif // _XM_VMX128_INTRINSICS_
+#endif
 }

 //------------------------------------------------------------------------------
@ -1616,11 +1568,9 @@ inline void XM_CALLCONV XMStoreFloat4A
    vst1q_f32_ex( reinterpret_cast<float*>(pDestination), V, 128 );
 #elif defined(_XM_SSE_INTRINSICS_)
    _mm_store_ps( &pDestination->x, V );
-#else // _XM_VMX128_INTRINSICS_
-#endif // _XM_VMX128_INTRINSICS_
+#endif
 }

-
 //------------------------------------------------------------------------------
 _Use_decl_annotations_
 inline void XM_CALLCONV XMStoreSInt4
@ -1648,8 +1598,7 @@ inline void XM_CALLCONV XMStoreSInt4
    vOverflow = _mm_andnot_ps(vOverflow,_mm_castsi128_ps(vResulti));
    vOverflow = _mm_or_ps(vOverflow,vResult);
    _mm_storeu_si128( reinterpret_cast<__m128i*>(pDestination), _mm_castps_si128(vOverflow) );
-#else // _XM_VMX128_INTRINSICS_
-#endif // _XM_VMX128_INTRINSICS_
+#endif
 }

 //------------------------------------------------------------------------------
@ -1688,8 +1637,7 @@ inline void XM_CALLCONV XMStoreUInt4
    // On those that are too large, set to 0xFFFFFFFF
    vResult = _mm_or_ps(vResult,vOverflow);
    _mm_storeu_si128( reinterpret_cast<__m128i*>(pDestination), _mm_castps_si128(vResult) );
-#else // _XM_VMX128_INTRINSICS_
-#endif // _XM_VMX128_INTRINSICS_
+#endif
 }

 //------------------------------------------------------------------------------
@ -1736,11 +1684,9 @@ inline void XM_CALLCONV XMStoreFloat3x3
    _mm_storeu_ps(&pDestination->m[1][1],vTemp2);
    vTemp3 = XM_PERMUTE_PS(vTemp3,_MM_SHUFFLE(2,2,2,2));
    _mm_store_ss(&pDestination->m[2][2],vTemp3);
-#else // _XM_VMX128_INTRINSICS_
-#endif // _XM_VMX128_INTRINSICS_
+#endif
 }

-
 //------------------------------------------------------------------------------
 _Use_decl_annotations_
 inline void XM_CALLCONV XMStoreFloat4x3
@ -1793,8 +1739,7 @@ inline void XM_CALLCONV XMStoreFloat4x3
    _mm_storeu_ps(&pDestination->m[0][0],vTemp1);
    _mm_storeu_ps(&pDestination->m[1][1],vTemp2x);
    _mm_storeu_ps(&pDestination->m[2][2],vTemp3);
-#else // _XM_VMX128_INTRINSICS_
-#endif // _XM_VMX128_INTRINSICS_
+#endif
 }

 //------------------------------------------------------------------------------
@ -1860,11 +1805,9 @@ inline void XM_CALLCONV XMStoreFloat4x3A
    _mm_store_ps(&pDestination->m[0][0],vTemp1);
    _mm_store_ps(&pDestination->m[1][1],vTemp2);
    _mm_store_ps(&pDestination->m[2][2],vTemp3);
-#else // _XM_VMX128_INTRINSICS_
-#endif // _XM_VMX128_INTRINSICS_
+#endif
 }

-
 //------------------------------------------------------------------------------
 _Use_decl_annotations_
 inline void XM_CALLCONV XMStoreFloat4x4
@ -1906,8 +1849,7 @@ inline void XM_CALLCONV XMStoreFloat4x4
    _mm_storeu_ps( &pDestination->_21, M.r[1] );
    _mm_storeu_ps( &pDestination->_31, M.r[2] );
    _mm_storeu_ps( &pDestination->_41, M.r[3] );
-#else // _XM_VMX128_INTRINSICS_
-#endif // _XM_VMX128_INTRINSICS_
+#endif
 }

 //------------------------------------------------------------------------------
@ -1952,7 +1894,6 @@ inline void XM_CALLCONV XMStoreFloat4x4A
    _mm_store_ps( &pDestination->_21, M.r[1] );
    _mm_store_ps( &pDestination->_31, M.r[2] );
    _mm_store_ps( &pDestination->_41, M.r[3] );
-#else // _XM_VMX128_INTRINSICS_
-#endif // _XM_VMX128_INTRINSICS_
+#endif
 }

--- a/Inc/DirectXMathMatrix.inl
+++ b/Inc/DirectXMathMatrix.inl
@ -148,8 +148,7 @@ inline bool XM_CALLCONV XMMatrixIsInfinite
    vTemp1 = _mm_or_ps(vTemp1,vTemp3);
    // If any are infinity, the signs are true.
    return (_mm_movemask_ps(vTemp1)!=0);
-#else // _XM_VMX128_INTRINSICS_
-#endif // _XM_VMX128_INTRINSICS_
+#endif
 }

 //------------------------------------------------------------------------------
@ -210,8 +209,7 @@ inline bool XM_CALLCONV XMMatrixIsIdentity
    vTemp3 = _mm_and_ps(vTemp3,vTemp4);
    vTemp1 = _mm_and_ps(vTemp1,vTemp3);
    return (_mm_movemask_ps(vTemp1)==0x0f);
-#else // _XM_VMX128_INTRINSICS_
-#endif // _XM_VMX128_INTRINSICS_
+#endif
 }

 //------------------------------------------------------------------------------
@ -357,8 +355,7 @@ inline XMMATRIX XM_CALLCONV XMMatrixMultiply
    vX = _mm_add_ps(vX,vY);
    mResult.r[3] = vX;
    return mResult;
-#else // _XM_VMX128_INTRINSICS_
-#endif // _XM_VMX128_INTRINSICS_
+#endif
 }

 //------------------------------------------------------------------------------
@ -530,8 +527,7 @@ inline XMMATRIX XM_CALLCONV XMMatrixMultiplyTranspose
    // x.w,y.w,z.w,w.w
    mResult.r[3] = _mm_shuffle_ps(vTemp3, vTemp4,_MM_SHUFFLE(3,1,3,1));
    return mResult;
-#else // _XM_VMX128_INTRINSICS_
-#endif // _XM_VMX128_INTRINSICS_
+#endif
 }

 //------------------------------------------------------------------------------
@ -596,8 +592,7 @@ inline XMMATRIX XM_CALLCONV XMMatrixTranspose
    // x.w,y.w,z.w,w.w
    mResult.r[3] = _mm_shuffle_ps(vTemp3, vTemp4,_MM_SHUFFLE(3,1,3,1));
    return mResult;
-#else // _XM_VMX128_INTRINSICS_
-#endif // _XM_VMX128_INTRINSICS_
+#endif
 }

 //------------------------------------------------------------------------------
@ -817,8 +812,7 @@ inline XMMATRIX XM_CALLCONV XMMatrixInverse
    mResult.r[2] = _mm_mul_ps(C4,vTemp);
    mResult.r[3] = _mm_mul_ps(C6,vTemp);
    return mResult;
-#else // _XM_VMX128_INTRINSICS_
-#endif // _XM_VMX128_INTRINSICS_
+#endif
 }

 //------------------------------------------------------------------------------
@ -828,8 +822,6 @@ inline XMVECTOR XM_CALLCONV XMMatrixDeterminant
    FXMMATRIX M
 )
 {
-#if defined(_XM_NO_INTRINSICS_) || defined(_XM_SSE_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
-
    static const XMVECTORF32 Sign = {1.0f, -1.0f, 1.0f, -1.0f};

    XMVECTOR V0 = XMVectorSwizzle<XM_SWIZZLE_Y, XM_SWIZZLE_X, XM_SWIZZLE_X, XM_SWIZZLE_X>(M.r[2]);
@ -864,9 +856,6 @@ inline XMVECTOR XM_CALLCONV XMMatrixDeterminant
    R = XMVectorMultiplyAdd(V2, P2, R);

    return XMVector4Dot(S, R);
-
-#else // _XM_VMX128_INTRINSICS_
-#endif // _XM_VMX128_INTRINSICS_
 }

 #define XM3RANKDECOMPOSE(a, b, c, x, y, z)      \
@ -1030,17 +1019,12 @@ inline bool XM_CALLCONV XMMatrixDecompose

 inline XMMATRIX XM_CALLCONV XMMatrixIdentity()
 {
-#if defined(_XM_NO_INTRINSICS_) || defined(_XM_SSE_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
-
    XMMATRIX M;
    M.r[0] = g_XMIdentityR0.v;
    M.r[1] = g_XMIdentityR1.v;
    M.r[2] = g_XMIdentityR2.v;
    M.r[3] = g_XMIdentityR3.v;
    return M;
-
-#else // _XM_VMX128_INTRINSICS_
-#endif // _XM_VMX128_INTRINSICS_
 }

 //------------------------------------------------------------------------------
@ -1108,8 +1092,7 @@ inline XMMATRIX XM_CALLCONV XMMatrixTranslation
    M.r[2] = g_XMIdentityR2.v;
    M.r[3] = XMVectorSet(OffsetX, OffsetY, OffsetZ, 1.f );
    return M;
-#else // _XM_VMX128_INTRINSICS_
-#endif // _XM_VMX128_INTRINSICS_
+#endif
 }


@ -1151,8 +1134,7 @@ inline XMMATRIX XM_CALLCONV XMMatrixTranslationFromVector
    M.r[2] = g_XMIdentityR2.v;
    M.r[3] = XMVectorSelect( g_XMIdentityR3.v, Offset, g_XMSelect1110.v );
    return M;
-#else // _XM_VMX128_INTRINSICS_
-#endif // _XM_VMX128_INTRINSICS_
+#endif
 }

 //------------------------------------------------------------------------------
@ -1203,8 +1185,7 @@ inline XMMATRIX XM_CALLCONV XMMatrixScaling
    M.r[2] = _mm_set_ps( 0, ScaleZ, 0, 0 );
    M.r[3] = g_XMIdentityR3.v;
    return M;
-#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
-#endif // _XM_VMX128_INTRINSICS_
+#endif
 }

 //------------------------------------------------------------------------------
@ -1252,8 +1233,7 @@ inline XMMATRIX XM_CALLCONV XMMatrixScalingFromVector
    M.r[2] = _mm_and_ps(Scale,g_XMMaskZ);
    M.r[3] = g_XMIdentityR3.v;
    return M;
-#else // _XM_VMX128_INTRINSICS_
-#endif // _XM_VMX128_INTRINSICS_
+#endif
 }

 //------------------------------------------------------------------------------
@ -1329,8 +1309,7 @@ inline XMMATRIX XM_CALLCONV XMMatrixRotationX
    M.r[2] = vCos;
    M.r[3] = g_XMIdentityR3;
    return M;
-#else // _XM_VMX128_INTRINSICS_
-#endif // _XM_VMX128_INTRINSICS_
+#endif
 }

 //------------------------------------------------------------------------------
@ -1406,8 +1385,7 @@ inline XMMATRIX XM_CALLCONV XMMatrixRotationY
    M.r[0] = vSin;
    M.r[3] = g_XMIdentityR3;
    return M;
-#else // _XM_VMX128_INTRINSICS_
-#endif // _XM_VMX128_INTRINSICS_
+#endif
 }

 //------------------------------------------------------------------------------
@ -1483,8 +1461,7 @@ inline XMMATRIX XM_CALLCONV XMMatrixRotationZ
    M.r[2] = g_XMIdentityR2;
    M.r[3] = g_XMIdentityR3;
    return M;
-#else // _XM_VMX128_INTRINSICS_
-#endif // _XM_VMX128_INTRINSICS_
+#endif
 }

 //------------------------------------------------------------------------------
@ -1598,8 +1575,7 @@ inline XMMATRIX XM_CALLCONV XMMatrixRotationNormal
    M.r[2] = V2;
    M.r[3] = g_XMIdentityR3.v;
    return M;
-#else // _XM_VMX128_INTRINSICS_
-#endif // _XM_VMX128_INTRINSICS_
+#endif
 }

 //------------------------------------------------------------------------------
@ -1613,13 +1589,8 @@ inline XMMATRIX XM_CALLCONV XMMatrixRotationAxis
    assert(!XMVector3Equal(Axis, XMVectorZero()));
    assert(!XMVector3IsInfinite(Axis));

-#if defined(_XM_NO_INTRINSICS_) || defined(_XM_SSE_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
-
    XMVECTOR Normal = XMVector3Normalize(Axis);
    return XMMatrixRotationNormal(Normal, Angle);
-
-#else // _XM_VMX128_INTRINSICS_
-#endif // _XM_VMX128_INTRINSICS_
 }

 //------------------------------------------------------------------------------
@ -1705,8 +1676,7 @@ inline XMMATRIX XM_CALLCONV XMMatrixRotationQuaternion
    M.r[2] = Q1;
    M.r[3] = g_XMIdentityR3;
    return M;
-#else // _XM_VMX128_INTRINSICS_
-#endif // _XM_VMX128_INTRINSICS_
+#endif
 }

 //------------------------------------------------------------------------------
@ -1849,8 +1819,6 @@ inline XMMATRIX XM_CALLCONV XMMatrixReflect
    assert(!XMVector3Equal(ReflectionPlane, XMVectorZero()));
    assert(!XMPlaneIsInfinite(ReflectionPlane));

-#if defined(_XM_NO_INTRINSICS_) || defined(_XM_SSE_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
-
    static const XMVECTORF32 NegativeTwo = {-2.0f, -2.0f, -2.0f, 0.0f};

    XMVECTOR P = XMPlaneNormalize(ReflectionPlane);
@ -1867,9 +1835,6 @@ inline XMMATRIX XM_CALLCONV XMMatrixReflect
    M.r[2] = XMVectorMultiplyAdd(C, S, g_XMIdentityR2.v);
    M.r[3] = XMVectorMultiplyAdd(D, S, g_XMIdentityR3.v);
    return M;
-
-#else // _XM_VMX128_INTRINSICS_
-#endif // _XM_VMX128_INTRINSICS_
 }

 //------------------------------------------------------------------------------
@ -1947,8 +1912,6 @@ inline XMMATRIX XM_CALLCONV XMMatrixLookToLH
    assert(!XMVector3Equal(UpDirection, XMVectorZero()));
    assert(!XMVector3IsInfinite(UpDirection));

-#if defined(_XM_NO_INTRINSICS_) || defined(_XM_SSE_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
-
    XMVECTOR R2 = XMVector3Normalize(EyeDirection);

    XMVECTOR R0 = XMVector3Cross(UpDirection, R2);
@ -1971,9 +1934,6 @@ inline XMMATRIX XM_CALLCONV XMMatrixLookToLH
    M = XMMatrixTranspose(M);

    return M;
-
-#else // _XM_VMX128_INTRINSICS_
-#endif // _XM_VMX128_INTRINSICS_
 }

 //------------------------------------------------------------------------------
@ -1991,6 +1951,9 @@ inline XMMATRIX XM_CALLCONV XMMatrixLookToRH

 //------------------------------------------------------------------------------

+#pragma prefast(push)
+#pragma prefast(disable:28931, "PREfast noise: Esp:1266")
+
 inline XMMATRIX XM_CALLCONV XMMatrixPerspectiveLH
 (
    float ViewWidth, 
@ -2073,8 +2036,7 @@ inline XMMATRIX XM_CALLCONV XMMatrixPerspectiveLH
    M.r[3] = vTemp;

    return M;
-#else // _XM_VMX128_INTRINSICS_
-#endif // _XM_VMX128_INTRINSICS_
+#endif
 }

 //------------------------------------------------------------------------------
@ -2161,8 +2123,7 @@ inline XMMATRIX XM_CALLCONV XMMatrixPerspectiveRH
    vTemp = _mm_shuffle_ps(vTemp,vValues,_MM_SHUFFLE(2,1,0,0));
    M.r[3] = vTemp;
    return M;
-#else // _XM_VMX128_INTRINSICS_
-#endif // _XM_VMX128_INTRINSICS_
+#endif
 }

 //------------------------------------------------------------------------------
@ -2263,8 +2224,7 @@ inline XMMATRIX XM_CALLCONV XMMatrixPerspectiveFovLH
    vTemp = _mm_shuffle_ps(vTemp,vValues,_MM_SHUFFLE(2,1,0,0));
    M.r[3] = vTemp;
    return M;
-#else // _XM_VMX128_INTRINSICS_
-#endif // _XM_VMX128_INTRINSICS_
+#endif
 }

 //------------------------------------------------------------------------------
@ -2363,8 +2323,7 @@ inline XMMATRIX XM_CALLCONV XMMatrixPerspectiveFovRH
    vTemp = _mm_shuffle_ps(vTemp,vValues,_MM_SHUFFLE(2,1,0,0));
    M.r[3] = vTemp;
    return M;
-#else // _XM_VMX128_INTRINSICS_
-#endif // _XM_VMX128_INTRINSICS_
+#endif
 }

 //------------------------------------------------------------------------------
@ -2461,8 +2420,7 @@ inline XMMATRIX XM_CALLCONV XMMatrixPerspectiveOffCenterLH
    vValues = _mm_and_ps(vValues,g_XMMaskZ);
    M.r[3] = vValues;
    return M;
-#else // _XM_VMX128_INTRINSICS_
-#endif // _XM_VMX128_INTRINSICS_
+#endif
 }

 //------------------------------------------------------------------------------
@ -2559,8 +2517,7 @@ inline XMMATRIX XM_CALLCONV XMMatrixPerspectiveOffCenterRH
    vValues = _mm_and_ps(vValues,g_XMMaskZ);
    M.r[3] = vValues;
    return M;
-#else // _XM_VMX128_INTRINSICS_
-#endif // _XM_VMX128_INTRINSICS_
+#endif
 }

 //------------------------------------------------------------------------------
@ -2644,8 +2601,7 @@ inline XMMATRIX XM_CALLCONV XMMatrixOrthographicLH
    vTemp = _mm_shuffle_ps(vTemp,vValues,_MM_SHUFFLE(3,1,0,0));
    M.r[3] = vTemp;
    return M;
-#else // _XM_VMX128_INTRINSICS_
-#endif // _XM_VMX128_INTRINSICS_
+#endif
 }

 //------------------------------------------------------------------------------
@ -2729,8 +2685,7 @@ inline XMMATRIX XM_CALLCONV XMMatrixOrthographicRH
    vTemp = _mm_shuffle_ps(vTemp,vValues,_MM_SHUFFLE(3,1,0,0));
    M.r[3] = vTemp;
    return M;
-#else // _XM_VMX128_INTRINSICS_
-#endif // _XM_VMX128_INTRINSICS_
+#endif
 }

 //------------------------------------------------------------------------------
@ -2830,8 +2785,7 @@ inline XMMATRIX XM_CALLCONV XMMatrixOrthographicOffCenterLH
    vValues = _mm_mul_ps(vValues,rMem2);
    M.r[3] = vValues;
    return M;
-#else // _XM_VMX128_INTRINSICS_
-#endif // _XM_VMX128_INTRINSICS_
+#endif
 }

 //------------------------------------------------------------------------------
@ -2931,10 +2885,10 @@ inline XMMATRIX XM_CALLCONV XMMatrixOrthographicOffCenterRH
    vValues = _mm_mul_ps(vValues,rMem2);
    M.r[3] = vValues;
    return M;
-#else // _XM_VMX128_INTRINSICS_
-#endif // _XM_VMX128_INTRINSICS_
+#endif
 }

+#pragma prefast(pop)

 /****************************************************************************
 *
@ -3057,8 +3011,7 @@ inline XMMATRIX& XMMATRIX::operator/= (float S)
    r[2] = _mm_div_ps( r[2], vS );
    r[3] = _mm_div_ps( r[3], vS );
    return *this;
-#else // _XM_VMX128_INTRINSICS_
-#endif // _XM_VMX128_INTRINSICS_
+#endif
 }

 //------------------------------------------------------------------------------
@ -3139,8 +3092,7 @@ inline XMMATRIX XMMATRIX::operator/ (float S) const
    R.r[2] = _mm_div_ps( r[2], vS );
    R.r[3] = _mm_div_ps( r[3], vS );
    return R;
-#else // _XM_VMX128_INTRINSICS_
-#endif // _XM_VMX128_INTRINSICS_
+#endif
 }

 //------------------------------------------------------------------------------
--- a/Inc/DirectXMathMisc.inl
+++ b/Inc/DirectXMathMisc.inl
@ -72,10 +72,7 @@ inline bool XM_CALLCONV XMQuaternionIsIdentity
    FXMVECTOR Q
 )
 {
-#if defined(_XM_NO_INTRINSICS_) || defined(_XM_SSE_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
    return XMVector4Equal(Q, g_XMIdentityR3.v);
-#else // _XM_VMX128_INTRINSICS_
-#endif // _XM_VMX128_INTRINSICS_
 }

 //------------------------------------------------------------------------------
@ -182,8 +179,7 @@ inline XMVECTOR XM_CALLCONV XMQuaternionMultiply
    Q2Y = _mm_add_ps(Q2Y,Q2Z);
    vResult = _mm_add_ps(vResult,Q2Y);
    return vResult;
-#else // _XM_VMX128_INTRINSICS_
-#endif // _XM_VMX128_INTRINSICS_
+#endif
 }

 //------------------------------------------------------------------------------
@ -257,8 +253,7 @@ inline XMVECTOR XM_CALLCONV XMQuaternionConjugate
 #elif defined(_XM_SSE_INTRINSICS_)
    static const XMVECTORF32 NegativeOne3 = {-1.0f,-1.0f,-1.0f,1.0f};
    return _mm_mul_ps(Q,NegativeOne3);
-#else // _XM_VMX128_INTRINSICS_
-#endif // _XM_VMX128_INTRINSICS_
+#endif
 }

 //------------------------------------------------------------------------------
@ -268,8 +263,6 @@ inline XMVECTOR XM_CALLCONV XMQuaternionInverse
    FXMVECTOR Q
 )
 {
-#if defined(_XM_NO_INTRINSICS_) || defined(_XM_SSE_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
-
    const XMVECTOR  Zero = XMVectorZero();

    XMVECTOR L = XMVector4LengthSq(Q);
@ -282,9 +275,6 @@ inline XMVECTOR XM_CALLCONV XMQuaternionInverse
    Result = XMVectorSelect(Result, Zero, Control);

    return Result;
-
-#else // _XM_VMX128_INTRINSICS_
-#endif // _XM_VMX128_INTRINSICS_
 }

 //------------------------------------------------------------------------------
@ -294,8 +284,6 @@ inline XMVECTOR XM_CALLCONV XMQuaternionLn
    FXMVECTOR Q
 )
 {
-#if defined(_XM_NO_INTRINSICS_) || defined(_XM_SSE_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
-
    static const XMVECTORF32 OneMinusEpsilon = {1.0f - 0.00001f, 1.0f - 0.00001f, 1.0f - 0.00001f, 1.0f - 0.00001f};

    XMVECTOR QW = XMVectorSplatW(Q);
@ -312,9 +300,6 @@ inline XMVECTOR XM_CALLCONV XMQuaternionLn
    Result = XMVectorSelect(Q0, Result, ControlW);

    return Result;
-
-#else // _XM_VMX128_INTRINSICS_
-#endif // _XM_VMX128_INTRINSICS_
 }

 //------------------------------------------------------------------------------
@ -324,8 +309,6 @@ inline XMVECTOR XM_CALLCONV XMQuaternionExp
    FXMVECTOR Q
 )
 {
-#if defined(_XM_NO_INTRINSICS_) || defined(_XM_SSE_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
-
    XMVECTOR Theta = XMVector3Length(Q);

    XMVECTOR SinTheta, CosTheta;
@ -342,9 +325,6 @@ inline XMVECTOR XM_CALLCONV XMQuaternionExp
    Result = XMVectorSelect(CosTheta, Result, g_XMSelect1110.v);

    return Result;
-
-#else // _XM_VMX128_INTRINSICS_
-#endif // _XM_VMX128_INTRINSICS_
 }

 //------------------------------------------------------------------------------
@ -418,8 +398,8 @@ inline XMVECTOR XM_CALLCONV XMQuaternionSlerpV

 #elif defined(_XM_SSE_INTRINSICS_)
    static const XMVECTORF32 OneMinusEpsilon = {1.0f - 0.00001f, 1.0f - 0.00001f, 1.0f - 0.00001f, 1.0f - 0.00001f};
-    static const XMVECTORI32 SignMask2 = {0x80000000,0x00000000,0x00000000,0x00000000};
-    static const XMVECTORI32 MaskXY = {0xFFFFFFFF,0xFFFFFFFF,0x00000000,0x00000000};
+    static const XMVECTORU32 SignMask2 = {0x80000000,0x00000000,0x00000000,0x00000000};
+    static const XMVECTORU32 MaskXY = {0xFFFFFFFF,0xFFFFFFFF,0x00000000,0x00000000};

    XMVECTOR CosOmega = XMQuaternionDot(Q0, Q1);

@ -456,8 +436,7 @@ inline XMVECTOR XM_CALLCONV XMQuaternionSlerpV
    S1 = _mm_mul_ps(S1, Q1);
    Result = _mm_add_ps(Result,S1);
    return Result;
-#else // _XM_VMX128_INTRINSICS_
-#endif // _XM_VMX128_INTRINSICS_
+#endif
 }

 //------------------------------------------------------------------------------
@ -633,10 +612,7 @@ inline XMVECTOR XM_CALLCONV XMQuaternionBaryCentricV

 inline XMVECTOR XM_CALLCONV XMQuaternionIdentity()
 {
-#if defined(_XM_NO_INTRINSICS_) || defined(_XM_SSE_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
    return g_XMIdentityR3.v;
-#else // _XM_VMX128_INTRINSICS_
-#endif // _XM_VMX128_INTRINSICS_
 }

 //------------------------------------------------------------------------------
@ -660,8 +636,6 @@ inline XMVECTOR XM_CALLCONV XMQuaternionRotationRollPitchYawFromVector
    FXMVECTOR Angles // <Pitch, Yaw, Roll, 0>
 )
 {
-#if defined(_XM_NO_INTRINSICS_) || defined(_XM_SSE_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
-
    static const XMVECTORF32  Sign = {1.0f, -1.0f, -1.0f, 1.0f};

    XMVECTOR HalfAngles = XMVectorMultiply(Angles, g_XMOneHalf.v);
@ -683,9 +657,6 @@ inline XMVECTOR XM_CALLCONV XMQuaternionRotationRollPitchYawFromVector
    XMVECTOR Q = XMVectorMultiplyAdd(Q1, R1, Q0);

    return Q;
-
-#else // _XM_VMX128_INTRINSICS_
-#endif // _XM_VMX128_INTRINSICS_
 }

 //------------------------------------------------------------------------------
@ -717,8 +688,7 @@ inline XMVECTOR XM_CALLCONV XMQuaternionRotationNormal
    Scale = _mm_or_ps(Scale,vCosine);
    N = _mm_mul_ps(N,Scale);
    return N;
-#else // _XM_VMX128_INTRINSICS_
-#endif // _XM_VMX128_INTRINSICS_
+#endif
 }

 //------------------------------------------------------------------------------
@ -732,12 +702,9 @@ inline XMVECTOR XM_CALLCONV XMQuaternionRotationAxis
    assert(!XMVector3Equal(Axis, XMVectorZero()));
    assert(!XMVector3IsInfinite(Axis));

-#if defined(_XM_NO_INTRINSICS_) || defined(_XM_SSE_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
    XMVECTOR Normal = XMVector3Normalize(Axis);
    XMVECTOR Q = XMQuaternionRotationNormal(Normal, Angle);
    return Q;
-#else // _XM_VMX128_INTRINSICS_
-#endif // _XM_VMX128_INTRINSICS_
 }

 //------------------------------------------------------------------------------
@ -981,8 +948,7 @@ inline XMVECTOR XM_CALLCONV XMQuaternionRotationMatrix
    // the quaternion).
    t0 = XMVector4Length(t2);
    return _mm_div_ps(t2, t0);
-#else // _XM_VMX128_INTRINSICS_
-#endif // _XM_VMX128_INTRINSICS_
+#endif
 }

 //------------------------------------------------------------------------------
@ -1097,14 +1063,9 @@ inline XMVECTOR XM_CALLCONV XMPlaneDotCoord
 {
    // Result = P[0] * V[0] + P[1] * V[1] + P[2] * V[2] + P[3]

-#if defined(_XM_NO_INTRINSICS_) || defined(_XM_SSE_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
-
    XMVECTOR V3 = XMVectorSelect(g_XMOne.v, V, g_XMSelect1110.v);
    XMVECTOR Result = XMVector4Dot(P, V3);
    return Result;
-
-#else // _XM_VMX128_INTRINSICS_
-#endif // _XM_VMX128_INTRINSICS_
 }

 //------------------------------------------------------------------------------
@ -1150,8 +1111,7 @@ inline XMVECTOR XM_CALLCONV XMPlaneNormalizeEst
    // Get the reciprocal
    vDot = _mm_mul_ps(vDot,P);
    return vDot;
-#else // _XM_VMX128_INTRINSICS_
-#endif // _XM_VMX128_INTRINSICS_
+#endif
 }

 //------------------------------------------------------------------------------
@ -1197,8 +1157,7 @@ inline XMVECTOR XM_CALLCONV XMPlaneNormalize
    // Any that are infinity, set to zero
    vResult = _mm_and_ps(vResult,vLengthSq);
    return vResult;
-#else // _XM_VMX128_INTRINSICS_
-#endif // _XM_VMX128_INTRINSICS_
+#endif
 }

 //------------------------------------------------------------------------------
@ -1210,8 +1169,6 @@ inline XMVECTOR XM_CALLCONV XMPlaneIntersectLine
    FXMVECTOR LinePoint2
 )
 {
-#if defined(_XM_NO_INTRINSICS_) || defined(_XM_SSE_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
-
    XMVECTOR V1 = XMVector3Dot(P, LinePoint1);
    XMVECTOR V2 = XMVector3Dot(P, LinePoint2);
    XMVECTOR D = XMVectorSubtract(V1, V2);
@ -1226,9 +1183,6 @@ inline XMVECTOR XM_CALLCONV XMPlaneIntersectLine
    XMVECTOR Control = XMVectorNearEqual(D, Zero, g_XMEpsilon.v);

    return XMVectorSelect(Point, g_XMQNaN.v, Control);
-
-#else // _XM_VMX128_INTRINSICS_
-#endif // _XM_VMX128_INTRINSICS_
 }

 //------------------------------------------------------------------------------
@ -1243,7 +1197,6 @@ inline void XM_CALLCONV XMPlaneIntersectPlane
 {
    assert(pLinePoint1);
    assert(pLinePoint2);
-#if defined(_XM_NO_INTRINSICS_) || defined(_XM_SSE_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)

    XMVECTOR V1 = XMVector3Cross(P2, P1);

@ -1266,9 +1219,6 @@ inline void XM_CALLCONV XMPlaneIntersectPlane
    XMVECTOR Control = XMVectorLessOrEqual(LengthSq, g_XMEpsilon.v);
    *pLinePoint1 = XMVectorSelect(LinePoint1,g_XMQNaN.v, Control);
    *pLinePoint2 = XMVectorSelect(LinePoint2,g_XMQNaN.v, Control);
-
-#else // _XM_VMX128_INTRINSICS_
-#endif // _XM_VMX128_INTRINSICS_
 }

 //------------------------------------------------------------------------------
@ -1279,8 +1229,6 @@ inline XMVECTOR XM_CALLCONV XMPlaneTransform
    FXMMATRIX M
 )
 {
-#if defined(_XM_NO_INTRINSICS_) || defined(_XM_SSE_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
-
    XMVECTOR W = XMVectorSplatW(P);
    XMVECTOR Z = XMVectorSplatZ(P);
    XMVECTOR Y = XMVectorSplatY(P);
@ -1291,9 +1239,6 @@ inline XMVECTOR XM_CALLCONV XMPlaneTransform
    Result = XMVectorMultiplyAdd(Y, M.r[1], Result);
    Result = XMVectorMultiplyAdd(X, M.r[0], Result);
    return Result;
-
-#else // _XM_VMX128_INTRINSICS_
-#endif // _XM_VMX128_INTRINSICS_
 }

 //------------------------------------------------------------------------------
@ -1342,8 +1287,6 @@ inline XMVECTOR XM_CALLCONV XMPlaneFromPoints
    FXMVECTOR Point3
 )
 {
-#if defined(_XM_NO_INTRINSICS_) || defined(_XM_SSE_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
-
    XMVECTOR V21 = XMVectorSubtract(Point1, Point2);
    XMVECTOR V31 = XMVectorSubtract(Point1, Point3);

@ -1356,9 +1299,6 @@ inline XMVECTOR XM_CALLCONV XMPlaneFromPoints
    XMVECTOR Result = XMVectorSelect(D, N, g_XMSelect1110.v);

    return Result;
-
-#else // _XM_VMX128_INTRINSICS_
-#endif // _XM_VMX128_INTRINSICS_
 }

 /****************************************************************************
@ -1484,8 +1424,7 @@ inline XMVECTOR XM_CALLCONV XMColorNegative
    XMVECTOR vTemp = _mm_xor_ps(vColor,g_XMNegate3);
    // Add 1,1,1,0 to -x,-y,-z,w
    return _mm_add_ps(vTemp,g_XMOne3);
-#else // _XM_VMX128_INTRINSICS_
-#endif // _XM_VMX128_INTRINSICS_
+#endif
 }

 //------------------------------------------------------------------------------
@ -1514,13 +1453,12 @@ inline XMVECTOR XM_CALLCONV XMColorAdjustSaturation
    const XMVECTORF32 gvLuminance = {0.2125f, 0.7154f, 0.0721f, 0.0f};

    float fLuminance = (vColor.vector4_f32[0]*gvLuminance.f[0])+(vColor.vector4_f32[1]*gvLuminance.f[1])+(vColor.vector4_f32[2]*gvLuminance.f[2]);
-    XMVECTORF32 vResult = {
-        ((vColor.vector4_f32[0] - fLuminance)*fSaturation)+fLuminance,
-        ((vColor.vector4_f32[1] - fLuminance)*fSaturation)+fLuminance,
-        ((vColor.vector4_f32[2] - fLuminance)*fSaturation)+fLuminance,
-        vColor.vector4_f32[3]};
-    return vResult.v;
-
+    XMVECTOR vResult;
+    vResult.vector4_f32[0] = ((vColor.vector4_f32[0] - fLuminance)*fSaturation)+fLuminance;
+    vResult.vector4_f32[1] = ((vColor.vector4_f32[1] - fLuminance)*fSaturation)+fLuminance;
+    vResult.vector4_f32[2] = ((vColor.vector4_f32[2] - fLuminance)*fSaturation)+fLuminance;
+    vResult.vector4_f32[3] = vColor.vector4_f32[3];
+    return vResult;
 #elif defined(_XM_ARM_NEON_INTRINSICS_)
    static const XMVECTORF32 gvLuminance = {0.2125f, 0.7154f, 0.0721f, 0.0f};
    XMVECTOR vLuminance = XMVector3Dot( vColor, gvLuminance );
@ -1540,8 +1478,7 @@ inline XMVECTOR XM_CALLCONV XMColorAdjustSaturation
    vLuminance = _mm_shuffle_ps(vResult,vColor,_MM_SHUFFLE(3,2,2,2));   // x = vResult.z,y = vResult.z,z = vColor.z,w=vColor.w
    vResult = _mm_shuffle_ps(vResult,vLuminance,_MM_SHUFFLE(3,0,1,0));  // x = vResult.x,y = vResult.y,z = vResult.z,w=vColor.w
    return vResult;
-#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
-#endif // _XM_VMX128_INTRINSICS_
+#endif
 }

 //------------------------------------------------------------------------------
@ -1575,8 +1512,7 @@ inline XMVECTOR XM_CALLCONV XMColorAdjustContrast
    vScale = _mm_shuffle_ps(vResult,vColor,_MM_SHUFFLE(3,2,2,2));   // x = vResult.z,y = vResult.z,z = vColor.z,w=vColor.w
    vResult = _mm_shuffle_ps(vResult,vScale,_MM_SHUFFLE(3,0,1,0));  // x = vResult.x,y = vResult.y,z = vResult.z,w=vColor.w
    return vResult;
-#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
-#endif // _XM_VMX128_INTRINSICS_
+#endif
 }

 //------------------------------------------------------------------------------
@ -2135,8 +2071,7 @@ inline XMVECTOR XM_CALLCONV XMFresnelTerm
    vResult = _mm_max_ps(vResult,g_XMZero);
    vResult = _mm_min_ps(vResult,g_XMOne);
    return vResult;
-#else // _XM_VMX128_INTRINSICS_
-#endif // _XM_VMX128_INTRINSICS_
+#endif
 }

 //------------------------------------------------------------------------------
@ -2447,7 +2382,7 @@ inline float XMScalarASin
    {
        omx = 0.0f;
    }
-    float root = sqrt(omx);
+    float root = sqrtf(omx);

    // 7-degree minimax approximation
    float result = ( ( ( ( ( ( -0.0012624911f * x + 0.0066700901f ) * x - 0.0170881256f ) * x + 0.0308918810f ) * x - 0.0501743046f ) * x + 0.0889789874f ) * x - 0.2145988016f ) * x + 1.5707963050f;
@ -2472,7 +2407,7 @@ inline float XMScalarASinEst
    {
        omx = 0.0f;
    }
-    float root = sqrt(omx);
+    float root = sqrtf(omx);

    // 3-degree minimax approximation
    float result = ((-0.0187293f*x+0.0742610f)*x-0.2121144f)*x+1.5707288f;
--- a/Inc/DirectXMathVector.inl
+++ b/Inc/DirectXMathVector.inl
--- a/Inc/DirectXPackedVector.h
+++ b/Inc/DirectXPackedVector.h
@ -21,11 +21,6 @@ namespace DirectX
 namespace PackedVector
 {

-#ifdef _XM_BIGENDIAN_
-#pragma bitfield_order(push)
-#pragma bitfield_order(lsb_to_msb)
-#endif
-
 #pragma warning(push)
 #pragma warning(disable:4201 4365 4324)

@ -864,13 +859,8 @@ struct XMU555
    XMU555& operator= (uint16_t Packed) { v = Packed; return *this; }
 };

-
 #pragma warning(pop)

-#ifdef _XM_BIGENDIAN_
-#pragma bitfield_order(pop)
-#endif
-

 /****************************************************************************
 *
@ -930,7 +920,6 @@ XMVECTOR    XM_CALLCONV     XMLoadUByte4(_In_ const XMUBYTE4* pSource);
 XMVECTOR    XM_CALLCONV     XMLoadUNibble4(_In_ const XMUNIBBLE4* pSource);
 XMVECTOR    XM_CALLCONV     XMLoadU555(_In_ const XMU555* pSource);

-
 /****************************************************************************
 *
 * Store operations
@ -972,7 +961,6 @@ void    XM_CALLCONV     XMStoreUByte4(_Out_ XMUBYTE4* pDestination, _In_ FXMVECT
 void    XM_CALLCONV     XMStoreUNibble4(_Out_ XMUNIBBLE4* pDestination, _In_ FXMVECTOR V);
 void    XM_CALLCONV     XMStoreU555(_Out_ XMU555* pDestination, _In_ FXMVECTOR V);

-
 /****************************************************************************
 *
 * Implementation
--- a/Inc/DirectXPackedVector.inl
+++ b/Inc/DirectXPackedVector.inl
@ -27,8 +27,6 @@ inline float PackedVector::XMConvertHalfToFloat
    HALF Value
 )
 {
-#if defined(_XM_NO_INTRINSICS_) || defined(_XM_SSE_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
-
    uint32_t Mantissa = (uint32_t)(Value & 0x03FF);

    uint32_t Exponent = (Value & 0x7C00);
@ -63,11 +61,12 @@ inline float PackedVector::XMConvertHalfToFloat
                      (Mantissa << 13);          // Mantissa

    return reinterpret_cast<float*>(&Result)[0];
-#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
-#endif
 }

 //------------------------------------------------------------------------------
+#pragma prefast(push)
+#pragma prefast(disable : 26015 26019, "PREfast noise: Esp:1307" )
+
 _Use_decl_annotations_
 inline float* PackedVector::XMConvertHalfToFloatStream
 (
@ -80,7 +79,12 @@ inline float* PackedVector::XMConvertHalfToFloatStream
 {
    assert(pOutputStream);
    assert(pInputStream);
-#if defined(_XM_NO_INTRINSICS_) || defined(_XM_SSE_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_) || defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
+
+    assert(InputStride >= sizeof(HALF));
+    _Analysis_assume_(InputStride >= sizeof(HALF));
+
+    assert(OutputStride >= sizeof(float));
+    _Analysis_assume_(OutputStride >= sizeof(float));

    const uint8_t* pHalf = reinterpret_cast<const uint8_t*>(pInputStream);
    uint8_t* pFloat = reinterpret_cast<uint8_t*>(pOutputStream);
@ -93,9 +97,6 @@ inline float* PackedVector::XMConvertHalfToFloatStream
    }

    return pOutputStream;
-
-#else // _XM_VMX128_INTRINSICS_
-#endif // _XM_VMX128_INTRINSICS_
 }

 //------------------------------------------------------------------------------
@ -105,7 +106,6 @@ inline PackedVector::HALF PackedVector::XMConvertFloatToHalf
    float Value
 )
 {
-#if defined(_XM_NO_INTRINSICS_) || defined(_XM_SSE_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
    uint32_t Result;

    uint32_t IValue = reinterpret_cast<uint32_t *>(&Value)[0];
@ -142,8 +142,6 @@ inline PackedVector::HALF PackedVector::XMConvertFloatToHalf
        Result = ((IValue + 0x0FFFU + ((IValue >> 13U) & 1U)) >> 13U)&0x7FFFU; 
    }
    return (HALF)(Result|Sign);
-#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
-#endif
 }

 //------------------------------------------------------------------------------
@ -159,7 +157,12 @@ inline PackedVector::HALF* PackedVector::XMConvertFloatToHalfStream
 {
    assert(pOutputStream);
    assert(pInputStream);
-#if defined(_XM_NO_INTRINSICS_) || defined(_XM_SSE_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_) || defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
+
+    assert(InputStride >= sizeof(float));
+    _Analysis_assume_(InputStride >= sizeof(float));
+
+    assert(OutputStride >= sizeof(HALF));
+    _Analysis_assume_(OutputStride >= sizeof(HALF));

    const uint8_t* pFloat = reinterpret_cast<const uint8_t*>(pInputStream);
    uint8_t* pHalf = reinterpret_cast<uint8_t*>(pOutputStream);
@ -171,16 +174,18 @@ inline PackedVector::HALF* PackedVector::XMConvertFloatToHalfStream
        pHalf += OutputStride;
    }
    return pOutputStream;
-
-#else // _XM_VMX128_INTRINSICS_
-#endif // _XM_VMX128_INTRINSICS_
 }

+#pragma prefast(pop)
+
 /****************************************************************************
 *
 * Vector and matrix load operations
 *
 ****************************************************************************/
+#pragma prefast(push)
+#pragma prefast(disable:28931, "PREfast noise: Esp:1266")
+
 _Use_decl_annotations_
 inline XMVECTOR XM_CALLCONV PackedVector::XMLoadColor
 (
@ -212,8 +217,7 @@ inline XMVECTOR XM_CALLCONV PackedVector::XMLoadColor
    vTemp = _mm_add_ps(vTemp,g_XMFixAA8R8G8B8);
    // Convert 0-255 to 0.0f-1.0f
    return _mm_mul_ps(vTemp,g_XMNormalizeA8R8G8B8);
-#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
-#endif // _XM_VMX128_INTRINSICS_
+#endif
 }

 //------------------------------------------------------------------------------
@ -224,7 +228,6 @@ inline XMVECTOR XM_CALLCONV PackedVector::XMLoadHalf2
 )
 {
    assert(pSource);
-#if defined(_XM_NO_INTRINSICS_) || defined(_XM_SSE_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
    XMVECTORF32 vResult = {
        XMConvertHalfToFloat(pSource->x),
        XMConvertHalfToFloat(pSource->y),
@ -232,8 +235,6 @@ inline XMVECTOR XM_CALLCONV PackedVector::XMLoadHalf2
        0.0f
    };
    return vResult.v;
-#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
-#endif // _XM_VMX128_INTRINSICS_
 }

 //------------------------------------------------------------------------------
@ -268,8 +269,7 @@ inline XMVECTOR XM_CALLCONV PackedVector::XMLoadShortN2
    vTemp = _mm_mul_ps(vTemp,g_XMNormalizeX16Y16);
    // Clamp result (for case of -32768)
    return _mm_max_ps( vTemp, g_XMNegativeOne );
-#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
-#endif // _XM_VMX128_INTRINSICS_
+#endif
 }

 //------------------------------------------------------------------------------
@ -302,8 +302,7 @@ inline XMVECTOR XM_CALLCONV PackedVector::XMLoadShort2
    vTemp = _mm_add_ps(vTemp,g_XMFixX16Y16);
    // Y is 65536 too large
    return _mm_mul_ps(vTemp,g_XMFixupY16);
-#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
-#endif // _XM_VMX128_INTRINSICS_
+#endif
 }

 //------------------------------------------------------------------------------
@ -339,8 +338,7 @@ inline XMVECTOR XM_CALLCONV PackedVector::XMLoadUShortN2
    // Y is 65536 times too large
    vTemp = _mm_mul_ps(vTemp,FixupY16);
    return vTemp;
-#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
-#endif // _XM_VMX128_INTRINSICS_
+#endif
 }

 //------------------------------------------------------------------------------
@ -375,8 +373,7 @@ inline XMVECTOR XM_CALLCONV PackedVector::XMLoadUShort2
    // y + 0x8000 to undo the signed order.
    vTemp = _mm_add_ps(vTemp,FixaddY16);
    return vTemp;
-#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
-#endif // _XM_VMX128_INTRINSICS_
+#endif
 }

 //------------------------------------------------------------------------------
@ -625,7 +622,6 @@ inline XMVECTOR XM_CALLCONV PackedVector::XMLoadHalf4
 )
 {
    assert(pSource);
-#if defined(_XM_NO_INTRINSICS_) || defined(_XM_SSE_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_) 
    XMVECTORF32 vResult = {
        XMConvertHalfToFloat(pSource->x),
        XMConvertHalfToFloat(pSource->y),
@ -633,8 +629,6 @@ inline XMVECTOR XM_CALLCONV PackedVector::XMLoadHalf4
        XMConvertHalfToFloat(pSource->w)
    };
    return vResult.v;
-#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
-#endif // _XM_VMX128_INTRINSICS_
 }

 //------------------------------------------------------------------------------
@ -676,8 +670,7 @@ inline XMVECTOR XM_CALLCONV PackedVector::XMLoadShortN4
    vTemp = XM_PERMUTE_PS(vTemp,_MM_SHUFFLE(3,1,2,0));
    // Clamp result (for case of -32768)
    return _mm_max_ps( vTemp, g_XMNegativeOne );
-#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
-#endif // _XM_VMX128_INTRINSICS_
+#endif
 }

 //------------------------------------------------------------------------------
@ -715,8 +708,7 @@ inline XMVECTOR XM_CALLCONV PackedVector::XMLoadShort4
    vTemp = _mm_mul_ps(vTemp,g_XMFixupY16W16);
    // Very important! The entries are x,z,y,w, flip it to x,y,z,w
    return XM_PERMUTE_PS(vTemp,_MM_SHUFFLE(3,1,2,0));
-#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
-#endif // _XM_VMX128_INTRINSICS_
+#endif
 }

 //------------------------------------------------------------------------------
@ -757,8 +749,7 @@ inline XMVECTOR XM_CALLCONV PackedVector::XMLoadUShortN4
    vTemp = _mm_mul_ps(vTemp,FixupY16W16);
    // Very important! The entries are x,z,y,w, flip it to x,y,z,w
    return XM_PERMUTE_PS(vTemp,_MM_SHUFFLE(3,1,2,0));
-#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
-#endif // _XM_VMX128_INTRINSICS_
+#endif
 }

 //------------------------------------------------------------------------------
@ -797,8 +788,7 @@ inline XMVECTOR XM_CALLCONV PackedVector::XMLoadUShort4
    vTemp = _mm_add_ps(vTemp,FixaddY16W16);
    // Very important! The entries are x,z,y,w, flip it to x,y,z,w
    return XM_PERMUTE_PS(vTemp,_MM_SHUFFLE(3,1,2,0));
-#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
-#endif // _XM_VMX128_INTRINSICS_
+#endif
 }

 //------------------------------------------------------------------------------
@ -838,8 +828,7 @@ inline XMVECTOR XM_CALLCONV PackedVector::XMLoadXDecN4
    vTemp = _mm_mul_ps(vTemp,g_XMNormalizeA2B10G10R10);
    // Clamp result (for case of -512)
    return _mm_max_ps( vTemp, g_XMNegativeOne );
-#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
-#endif // _XM_VMX128_INTRINSICS_
+#endif
 }

 //------------------------------------------------------------------------------
@ -865,7 +854,7 @@ inline XMVECTOR XM_CALLCONV PackedVector::XMLoadXDec4
    };
    return vResult.v;
 #elif defined(_XM_SSE_INTRINSICS_)
-    static const XMVECTORI32 XDec4Xor = {0x200, 0x200<<10, 0x200<<20, 0x80000000};
+    static const XMVECTORU32 XDec4Xor = {0x200, 0x200<<10, 0x200<<20, 0x80000000};
    static const XMVECTORF32 XDec4Add = {-512.0f,-512.0f*1024.0f,-512.0f*1024.0f*1024.0f,32768*65536.0f};
    // Splat the color in all four entries
    XMVECTOR vTemp = _mm_load_ps1(reinterpret_cast<const float *>(&pSource->v));
@ -880,8 +869,7 @@ inline XMVECTOR XM_CALLCONV PackedVector::XMLoadXDec4
    // Convert 0-255 to 0.0f-1.0f
    vTemp = _mm_mul_ps(vTemp,g_XMMulDec4);
    return vTemp;
-#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
-#endif // _XM_VMX128_INTRINSICS_
+#endif
 }

 //------------------------------------------------------------------------------
@ -920,8 +908,7 @@ inline XMVECTOR XM_CALLCONV PackedVector::XMLoadUDecN4
    // Convert 0-255 to 0.0f-1.0f
    vTemp = _mm_mul_ps(vTemp,UDecN4Mul);
    return vTemp;
-#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
-#endif // _XM_VMX128_INTRINSICS_
+#endif
 }


@ -983,8 +970,7 @@ inline XMVECTOR XM_CALLCONV PackedVector::XMLoadUDec4
    // Convert 0-255 to 0.0f-1.0f
    vTemp = _mm_mul_ps(vTemp,g_XMMulDec4);
    return vTemp;
-#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
-#endif // _XM_VMX128_INTRINSICS_
+#endif
 }

 //------------------------------------------------------------------------------
@ -1027,8 +1013,7 @@ inline XMVECTOR XM_CALLCONV PackedVector::XMLoadDecN4
    vTemp = _mm_mul_ps(vTemp,DecN4Mul);
    // Clamp result (for case of -512/-1)
    return _mm_max_ps( vTemp, g_XMNegativeOne );
-#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
-#endif // _XM_VMX128_INTRINSICS_
+#endif
 }

 //------------------------------------------------------------------------------
@ -1069,8 +1054,7 @@ inline XMVECTOR XM_CALLCONV PackedVector::XMLoadDec4
    // Convert 0-255 to 0.0f-1.0f
    vTemp = _mm_mul_ps(vTemp,g_XMMulDec4);
    return vTemp;
-#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
-#endif // _XM_VMX128_INTRINSICS_
+#endif
 }

 //------------------------------------------------------------------------------
@ -1104,8 +1088,7 @@ inline XMVECTOR XM_CALLCONV PackedVector::XMLoadUByteN4
    // Fix y, z and w because they are too large
    vTemp = _mm_mul_ps(vTemp,LoadUByteN4Mul);
    return vTemp;
-#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
-#endif // _XM_VMX128_INTRINSICS_
+#endif
 }

 //------------------------------------------------------------------------------
@ -1139,8 +1122,7 @@ inline XMVECTOR XM_CALLCONV PackedVector::XMLoadUByte4
    // Fix y, z and w because they are too large
    vTemp = _mm_mul_ps(vTemp,LoadUByte4Mul);
    return vTemp;
-#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
-#endif // _XM_VMX128_INTRINSICS_
+#endif
 }

 //------------------------------------------------------------------------------
@ -1175,8 +1157,7 @@ inline XMVECTOR XM_CALLCONV PackedVector::XMLoadByteN4
    vTemp = _mm_mul_ps(vTemp,LoadByteN4Mul);
    // Clamp result (for case of -128)
    return _mm_max_ps( vTemp, g_XMNegativeOne );
-#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
-#endif // _XM_VMX128_INTRINSICS_
+#endif
 }

 //------------------------------------------------------------------------------
@ -1210,8 +1191,7 @@ inline XMVECTOR XM_CALLCONV PackedVector::XMLoadByte4
    // Fix y, z and w because they are too large
    vTemp = _mm_mul_ps(vTemp,LoadByte4Mul);
    return vTemp;
-#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
-#endif // _XM_VMX128_INTRINSICS_
+#endif
 }

 //------------------------------------------------------------------------------
@ -1276,6 +1256,7 @@ inline XMVECTOR XM_CALLCONV PackedVector::XMLoadU555
 #endif // !_XM_SSE_INTRISICS_
 }

+#pragma prefast(pop)

 /****************************************************************************
 *
@ -1324,8 +1305,7 @@ inline void XM_CALLCONV PackedVector::XMStoreColor
    vInt = _mm_packus_epi16(vInt,vInt);
    // Store the color
    _mm_store_ss(reinterpret_cast<float *>(&pDestination->c),_mm_castsi128_ps(vInt));
-#else // _XM_VMX128_INTRINSICS_
-#endif // _XM_VMX128_INTRINSICS_
+#endif
 }

 //------------------------------------------------------------------------------
@ -1337,13 +1317,8 @@ inline void XM_CALLCONV PackedVector::XMStoreHalf2
 )
 {
    assert(pDestination);
-#if defined(_XM_NO_INTRINSICS_) || defined(_XM_SSE_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
-
    pDestination->x = XMConvertFloatToHalf(XMVectorGetX(V));
    pDestination->y = XMConvertFloatToHalf(XMVectorGetY(V));
-
-#else // _XM_VMX128_INTRINSICS_
-#endif // _XM_VMX128_INTRINSICS_
 }

 //------------------------------------------------------------------------------
@ -1378,8 +1353,7 @@ inline void XM_CALLCONV PackedVector::XMStoreShortN2
    __m128i vResulti = _mm_cvtps_epi32(vResult);
    vResulti = _mm_packs_epi32(vResulti,vResulti);
    _mm_store_ss(reinterpret_cast<float *>(&pDestination->x),_mm_castsi128_ps(vResulti));
-#else // _XM_VMX128_INTRINSICS_
-#endif // _XM_VMX128_INTRINSICS_
+#endif
 }

 //------------------------------------------------------------------------------
@ -1416,8 +1390,7 @@ inline void XM_CALLCONV PackedVector::XMStoreShort2
    // Pack the ints into shorts
    vInt = _mm_packs_epi32(vInt,vInt);
    _mm_store_ss(reinterpret_cast<float *>(&pDestination->x),_mm_castsi128_ps(vInt));
-#else // _XM_VMX128_INTRINSICS_
-#endif // _XM_VMX128_INTRINSICS_
+#endif
 }

 //------------------------------------------------------------------------------
@ -1455,8 +1428,7 @@ inline void XM_CALLCONV PackedVector::XMStoreUShortN2
    // manually extract the values to store them to memory
    pDestination->x = static_cast<int16_t>(_mm_extract_epi16(vInt,0));
    pDestination->y = static_cast<int16_t>(_mm_extract_epi16(vInt,2));
-#else // _XM_VMX128_INTRINSICS_
-#endif // _XM_VMX128_INTRINSICS_
+#endif
 }

 //------------------------------------------------------------------------------
@ -1492,8 +1464,7 @@ inline void XM_CALLCONV PackedVector::XMStoreUShort2
    // manually extract the values to store them to memory
    pDestination->x = static_cast<int16_t>(_mm_extract_epi16(vInt,0));
    pDestination->y = static_cast<int16_t>(_mm_extract_epi16(vInt,2));
-#else // _XM_VMX128_INTRINSICS_
-#endif // _XM_VMX128_INTRINSICS_
+#endif
 }

 //------------------------------------------------------------------------------
@ -1768,16 +1739,22 @@ inline void XM_CALLCONV PackedVector::XMStoreFloat3SE

    union { float f; int32_t i; } fi;
    fi.f = maxColor;
-    fi.i &= 0xFF800000; // cut off fraction
+    fi.i += 0x00004000; // round up leaving 9 bits in fraction (including assumed 1)

    pDestination->e = (fi.i - 0x37800000) >> 23;

    fi.i = 0x83000000 - fi.i;
    float ScaleR = fi.f;

+#ifdef _XM_NO_ROUNDF_
    pDestination->xm = static_cast<uint32_t>( Internal::round_to_nearest(x * ScaleR) );
    pDestination->ym = static_cast<uint32_t>( Internal::round_to_nearest(y * ScaleR) );
    pDestination->zm = static_cast<uint32_t>( Internal::round_to_nearest(z * ScaleR) );
+#else
+    pDestination->xm = static_cast<uint32_t>( roundf(x * ScaleR) );
+    pDestination->ym = static_cast<uint32_t>( roundf(y * ScaleR) );
+    pDestination->zm = static_cast<uint32_t>( roundf(z * ScaleR) );
+#endif
 }

 //------------------------------------------------------------------------------
@ -1789,8 +1766,6 @@ inline void XM_CALLCONV PackedVector::XMStoreHalf4
 )
 {
    assert(pDestination);
-#if defined(_XM_NO_INTRINSICS_) || defined(_XM_SSE_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
- 
    XMFLOAT4A t;
    XMStoreFloat4A(&t, V );

@ -1798,9 +1773,6 @@ inline void XM_CALLCONV PackedVector::XMStoreHalf4
    pDestination->y = XMConvertFloatToHalf(t.y);
    pDestination->z = XMConvertFloatToHalf(t.z);
    pDestination->w = XMConvertFloatToHalf(t.w);
-
-#else // _XM_VMX128_INTRINSICS_
-#endif // _XM_VMX128_INTRINSICS_
 }

 //------------------------------------------------------------------------------
@ -1844,8 +1816,7 @@ inline void XM_CALLCONV PackedVector::XMStoreShortN4
    __m128i vResulti = _mm_cvtps_epi32(vResult);
    vResulti = _mm_packs_epi32(vResulti,vResulti);
    _mm_store_sd(reinterpret_cast<double *>(&pDestination->x),_mm_castsi128_pd(vResulti));
-#else // _XM_VMX128_INTRINSICS_
-#endif // _XM_VMX128_INTRINSICS_
+#endif
 }

 //------------------------------------------------------------------------------
@ -1893,8 +1864,7 @@ inline void XM_CALLCONV PackedVector::XMStoreShort4
    // Pack the ints into shorts
    vInt = _mm_packs_epi32(vInt,vInt);
    _mm_store_sd(reinterpret_cast<double *>(&pDestination->x),_mm_castsi128_pd(vInt));
-#else // _XM_VMX128_INTRINSICS_
-#endif // _XM_VMX128_INTRINSICS_
+#endif
 }

 //------------------------------------------------------------------------------
@ -1943,8 +1913,7 @@ inline void XM_CALLCONV PackedVector::XMStoreUShortN4
    pDestination->y = static_cast<int16_t>(_mm_extract_epi16(vInt,2));
    pDestination->z = static_cast<int16_t>(_mm_extract_epi16(vInt,4));
    pDestination->w = static_cast<int16_t>(_mm_extract_epi16(vInt,6));
-#else // _XM_VMX128_INTRINSICS_
-#endif // _XM_VMX128_INTRINSICS_
+#endif
 }

 //------------------------------------------------------------------------------
@ -1992,8 +1961,7 @@ inline void XM_CALLCONV PackedVector::XMStoreUShort4
    pDestination->y = static_cast<int16_t>(_mm_extract_epi16(vInt,2));
    pDestination->z = static_cast<int16_t>(_mm_extract_epi16(vInt,4));
    pDestination->w = static_cast<int16_t>(_mm_extract_epi16(vInt,6));
-#else // _XM_VMX128_INTRINSICS_
-#endif // _XM_VMX128_INTRINSICS_
+#endif
 }

 //------------------------------------------------------------------------------
@ -2045,8 +2013,7 @@ inline void XM_CALLCONV PackedVector::XMStoreXDecN4
    vResult = XM_PERMUTE_PS(vResult,_MM_SHUFFLE(0,3,2,1));
    vResulti = _mm_or_si128(vResulti,_mm_castps_si128(vResult));
    _mm_store_ss(reinterpret_cast<float *>(&pDestination->v),_mm_castsi128_ps(vResulti));
-#else // _XM_VMX128_INTRINSICS_
-#endif // _XM_VMX128_INTRINSICS_
+#endif
 }

 //------------------------------------------------------------------------------
@ -2098,8 +2065,7 @@ inline void XM_CALLCONV PackedVector::XMStoreXDec4
    // i = x|y|z|w
    vResulti = _mm_or_si128(vResulti,vResulti2);
    _mm_store_ss(reinterpret_cast<float *>(&pDestination->v),_mm_castsi128_ps(vResulti));
-#else // _XM_VMX128_INTRINSICS_
-#endif // _XM_VMX128_INTRINSICS_
+#endif
 }

 //------------------------------------------------------------------------------
@ -2149,8 +2115,7 @@ inline void XM_CALLCONV PackedVector::XMStoreUDecN4
    // i = x|y|z|w
    vResulti = _mm_or_si128(vResulti,vResulti2);
    _mm_store_ss(reinterpret_cast<float *>(&pDestination->v),_mm_castsi128_ps(vResulti));
-#else // _XM_VMX128_INTRINSICS_
-#endif // _XM_VMX128_INTRINSICS_
+#endif
 }

 //------------------------------------------------------------------------------
@ -2226,8 +2191,7 @@ inline void XM_CALLCONV PackedVector::XMStoreUDec4
    // i = x|y|z|w
    vResulti = _mm_or_si128(vResulti,vResulti2);
    _mm_store_ss(reinterpret_cast<float *>(&pDestination->v),_mm_castsi128_ps(vResulti));
-#else // _XM_VMX128_INTRINSICS_
-#endif // _XM_VMX128_INTRINSICS_
+#endif
 }

 //------------------------------------------------------------------------------
@ -2275,8 +2239,7 @@ inline void XM_CALLCONV PackedVector::XMStoreDecN4
    // i = x|y|z|w
    vResulti = _mm_or_si128(vResulti,vResulti2);
    _mm_store_ss(reinterpret_cast<float *>(&pDestination->v),_mm_castsi128_ps(vResulti));
-#else // _XM_VMX128_INTRINSICS_
-#endif // _XM_VMX128_INTRINSICS_
+#endif
 }

 //------------------------------------------------------------------------------
@ -2326,8 +2289,7 @@ inline void XM_CALLCONV PackedVector::XMStoreDec4
    // i = x|y|z|w
    vResulti = _mm_or_si128(vResulti,vResulti2);
    _mm_store_ss(reinterpret_cast<float *>(&pDestination->v),_mm_castsi128_ps(vResulti));
-#else // _XM_VMX128_INTRINSICS_
-#endif // _XM_VMX128_INTRINSICS_
+#endif
 }

 //------------------------------------------------------------------------------
@ -2378,8 +2340,7 @@ inline void XM_CALLCONV PackedVector::XMStoreUByteN4
    // i = x|y|z|w
    vResulti = _mm_or_si128(vResulti,vResulti2);
    _mm_store_ss(reinterpret_cast<float *>(&pDestination->v),_mm_castsi128_ps(vResulti));
-#else // _XM_VMX128_INTRINSICS_
-#endif // _XM_VMX128_INTRINSICS_
+#endif
 }

 //------------------------------------------------------------------------------
@ -2430,8 +2391,7 @@ inline void XM_CALLCONV PackedVector::XMStoreUByte4
    // i = x|y|z|w
    vResulti = _mm_or_si128(vResulti,vResulti2);
    _mm_store_ss(reinterpret_cast<float *>(&pDestination->v),_mm_castsi128_ps(vResulti));
-#else // _XM_VMX128_INTRINSICS_
-#endif // _XM_VMX128_INTRINSICS_
+#endif
 }

 //------------------------------------------------------------------------------
@ -2480,8 +2440,7 @@ inline void XM_CALLCONV PackedVector::XMStoreByteN4
    // i = x|y|z|w
    vResulti = _mm_or_si128(vResulti,vResulti2);
    _mm_store_ss(reinterpret_cast<float *>(&pDestination->v),_mm_castsi128_ps(vResulti));
-#else // _XM_VMX128_INTRINSICS_
-#endif // _XM_VMX128_INTRINSICS_
+#endif
 }

 //------------------------------------------------------------------------------
@ -2532,8 +2491,7 @@ inline void XM_CALLCONV PackedVector::XMStoreByte4
    // i = x|y|z|w
    vResulti = _mm_or_si128(vResulti,vResulti2);
    _mm_store_ss(reinterpret_cast<float *>(&pDestination->v),_mm_castsi128_ps(vResulti));
-#else // _XM_VMX128_INTRINSICS_
-#endif // _XM_VMX128_INTRINSICS_
+#endif
 }

 //------------------------------------------------------------------------------