DirectXMath 3.06

2024-11-25 21:40:06 +00:00 · 2016-05-23 14:29:47 -07:00 · 2016-05-23 14:29:47 -07:00 · aee6e900f0
commit aee6e900f0
parent fb43a89861
5 changed files with 128 additions and 166 deletions
--- a/Inc/DirectXMath.h
+++ b/Inc/DirectXMath.h
@ -17,7 +17,7 @@
 #error DirectX Math requires C++
 #endif
-#define DIRECTX_MATH_VERSION 305
+#define DIRECTX_MATH_VERSION 306
 #if !defined(_XM_BIGENDIAN_) && !defined(_XM_LITTLEENDIAN_)
 #if defined(_M_X64) || defined(_M_IX86) || defined(_M_ARM)
@ -29,6 +29,7 @@
 #endif
 #endif // !_XM_BIGENDIAN_ && !_XM_LITTLEENDIAN_
 #if defined(_MSC_VER) && !defined(_M_ARM) && (!_MANAGED) && (!_M_CEE) && (!defined(_M_IX86_FP) || (_M_IX86_FP > 1)) && !defined(_XM_NO_INTRINSICS_) && !defined(_XM_VECTORCALL_)
 #if ((_MSC_FULL_VER >= 170065501) && (_MSC_VER < 1800)) || (_MSC_FULL_VER >= 180020418)
 #define _XM_VECTORCALL_ 1
@ -279,8 +280,8 @@ typedef const XMVECTOR FXMVECTOR;
 typedef const XMVECTOR& FXMVECTOR;
 #endif
-// Fix-up for (4th) XMVECTOR parameter to pass in-register for ARM, Xbox 360, and vector call; by reference otherwise
+// Fix-up for (4th) XMVECTOR parameter to pass in-register for ARM, Xbox 360, and x64 vector call; by reference otherwise
-#if ( defined(_M_ARM) || defined(_XM_VMX128_INTRINSICS_) || _XM_VECTORCALL_ ) && !defined(_XM_NO_INTRINSICS_)
+#if ( defined(_M_ARM) || defined(_XM_VMX128_INTRINSICS_) || (_XM_VECTORCALL_ && !defined(_M_IX86) ) ) && !defined(_XM_NO_INTRINSICS_)
 typedef const XMVECTOR GXMVECTOR;
 #else
 typedef const XMVECTOR& GXMVECTOR;
@ -1392,6 +1393,10 @@ XMVECTOR    XM_CALLCONV     XMColorXYZToRGB( FXMVECTOR xyz );
 XMVECTOR    XM_CALLCONV     XMColorXYZToSRGB( FXMVECTOR xyz );
 XMVECTOR    XM_CALLCONV     XMColorSRGBToXYZ( FXMVECTOR srgb );
 XMVECTOR    XM_CALLCONV     XMColorRGBToSRGB( FXMVECTOR rgb );
 XMVECTOR    XM_CALLCONV     XMColorSRGBToRGB( FXMVECTOR srgb );
 /****************************************************************************
 *
 * Miscellaneous operations
--- a/Inc/DirectXMathMisc.inl
+++ b/Inc/DirectXMathMisc.inl
@ -1985,6 +1985,42 @@ inline XMVECTOR XM_CALLCONV XMColorSRGBToXYZ( FXMVECTOR srgb )
    return XMVectorSelect( srgb, clr, g_XMSelect1110 );
 }
 //------------------------------------------------------------------------------
 inline XMVECTOR XM_CALLCONV XMColorRGBToSRGB( FXMVECTOR rgb )
 {
    static const XMVECTORF32 Cutoff = { 0.0031308f, 0.0031308f, 0.0031308f, 1.f };
    static const XMVECTORF32 Linear = { 12.92f, 12.92f, 12.92f, 1.f };
    static const XMVECTORF32 Scale = { 1.055f, 1.055f, 1.055f, 1.f };
    static const XMVECTORF32 Bias = { 0.055f, 0.055f, 0.055f, 0.f };
    static const XMVECTORF32 InvGamma = { 1.0f/2.4f, 1.0f/2.4f, 1.0f/2.4f, 1.f };
    XMVECTOR V = XMVectorSaturate(rgb);
    XMVECTOR V0 = XMVectorMultiply( V, Linear );
    XMVECTOR V1 = Scale * XMVectorPow( V, InvGamma ) - Bias;
    XMVECTOR select = XMVectorLess( V, Cutoff );
    V = XMVectorSelect( V1, V0, select );
    return XMVectorSelect( rgb, V, g_XMSelect1110 );
 }
 //------------------------------------------------------------------------------
 inline XMVECTOR XM_CALLCONV XMColorSRGBToRGB( FXMVECTOR srgb )
 {
    static const XMVECTORF32 Cutoff = { 0.04045f, 0.04045f, 0.04045f, 1.f };
    static const XMVECTORF32 ILinear = { 1.f/12.92f, 1.f/12.92f, 1.f/12.92f, 1.f };
    static const XMVECTORF32 Scale = { 1.f/1.055f, 1.f/1.055f, 1.f/1.055f, 1.f };
    static const XMVECTORF32 Bias = { 0.055f, 0.055f, 0.055f, 0.f };
    static const XMVECTORF32 Gamma = { 2.4f, 2.4f, 2.4f, 1.f };
    XMVECTOR V = XMVectorSaturate(srgb);
    XMVECTOR V0 = XMVectorMultiply( V, ILinear );
    XMVECTOR V1 = XMVectorPow( (V + Bias) * Scale, Gamma );
    XMVECTOR select = XMVectorGreater( V, Cutoff );
    V = XMVectorSelect( V0, V1, select );
    return XMVectorSelect( srgb, V, g_XMSelect1110 );
 }
 /****************************************************************************
 *
 * Miscellaneous
--- a/Inc/DirectXMathVector.inl
+++ b/Inc/DirectXMathVector.inl
@ -2323,7 +2323,6 @@ inline XMVECTOR XM_CALLCONV XMVectorMax
 //------------------------------------------------------------------------------
 #if defined(_XM_NO_INTRINSICS_)
 namespace Internal
 {
    inline float round_to_nearest( float x )
@ -2345,7 +2344,8 @@ namespace Internal
        return i + 1.f;
    }
 };
-#else
+
 #if !defined(_XM_NO_INTRINSICS_)
 #pragma float_control(push)
 #pragma float_control(precise, on)
 #endif
--- a/Inc/DirectXPackedVector.h
+++ b/Inc/DirectXPackedVector.h
@ -921,6 +921,7 @@ XMVECTOR    XM_CALLCONV     XMLoadXDec4(_In_ const XMXDEC4* pSource);
 XMVECTOR    XM_CALLCONV     XMLoadDecN4(_In_ const XMDECN4* pSource);
 XMVECTOR    XM_CALLCONV     XMLoadDec4(_In_ const XMDEC4* pSource);
 XMVECTOR    XM_CALLCONV     XMLoadUDecN4(_In_ const XMUDECN4* pSource);
 XMVECTOR    XM_CALLCONV     XMLoadUDecN4_XR(_In_ const XMUDECN4* pSource);
 XMVECTOR    XM_CALLCONV     XMLoadUDec4(_In_ const XMUDEC4* pSource);
 XMVECTOR    XM_CALLCONV     XMLoadByteN4(_In_ const XMBYTEN4* pSource);
 XMVECTOR    XM_CALLCONV     XMLoadByte4(_In_ const XMBYTE4* pSource);
@ -962,6 +963,7 @@ void    XM_CALLCONV     XMStoreXDec4(_Out_ XMXDEC4* pDestination, _In_ FXMVECTOR
 void    XM_CALLCONV     XMStoreDecN4(_Out_ XMDECN4* pDestination, _In_ FXMVECTOR V);
 void    XM_CALLCONV     XMStoreDec4(_Out_ XMDEC4* pDestination, _In_ FXMVECTOR V);
 void    XM_CALLCONV     XMStoreUDecN4(_Out_ XMUDECN4* pDestination, _In_ FXMVECTOR V);
 void    XM_CALLCONV     XMStoreUDecN4_XR(_Out_ XMUDECN4* pDestination, _In_ FXMVECTOR V);
 void    XM_CALLCONV     XMStoreUDec4(_Out_ XMUDEC4* pDestination, _In_ FXMVECTOR V);
 void    XM_CALLCONV     XMStoreByteN4(_Out_ XMBYTEN4* pDestination, _In_ FXMVECTOR V);
 void    XM_CALLCONV     XMStoreByte4(_Out_ XMBYTE4* pDestination, _In_ FXMVECTOR V);
--- a/Inc/DirectXPackedVector.inl
+++ b/Inc/DirectXPackedVector.inl
@ -605,104 +605,16 @@ inline XMVECTOR XM_CALLCONV PackedVector::XMLoadFloat3SE
 {
    assert(pSource);
-    __declspec(align(16)) uint32_t Result[4];
+    union { float f; int32_t i; } fi;
-    uint32_t Mantissa;
+    fi.i = 0x33800000 + (pSource->e << 23);
-    uint32_t Exponent, ExpBits;
+    float Scale = fi.f;
-    if ( pSource->e == 0x1f ) // INF or NAN
+    XMVECTORF32 v = {
-    {
+        Scale * float( pSource->xm ),
-        Result[0] = 0x7f800000 | (pSource->xm << 14);
+        Scale * float( pSource->ym ),
-        Result[1] = 0x7f800000 | (pSource->ym << 14);
+        Scale * float( pSource->zm ),
-        Result[2] = 0x7f800000 | (pSource->zm << 14);
+        1.0f };
-    }
+    return v;
    else if ( pSource->e != 0 ) // The values are all normalized
    {
        Exponent = pSource->e;
        ExpBits = (Exponent + 112) << 23;
        Mantissa = pSource->xm;
        Result[0] = ExpBits | (Mantissa << 14);
        Mantissa = pSource->ym;
        Result[1] = ExpBits | (Mantissa << 14);
        Mantissa = pSource->zm;
        Result[2] = ExpBits | (Mantissa << 14);
    }
    else
    {
        // X Channel
        Mantissa = pSource->xm;
        if (Mantissa != 0) // The value is denormalized
        {
            // Normalize the value in the resulting float
            Exponent = 1;
            do
            {
                Exponent--;
                Mantissa <<= 1;
            } while ((Mantissa & 0x200) == 0);
            Mantissa &= 0x1FF;
        }
        else // The value is zero
        {
            Exponent = (uint32_t)-112;
        }
        Result[0] = ((Exponent + 112) << 23) | (Mantissa << 14);
        // Y Channel
        Mantissa = pSource->ym;
        if (Mantissa != 0) // The value is denormalized
        {
            // Normalize the value in the resulting float
            Exponent = 1;
            do
            {
                Exponent--;
                Mantissa <<= 1;
            } while ((Mantissa & 0x200) == 0);
            Mantissa &= 0x1FF;
        }
        else // The value is zero
        {
            Exponent = (uint32_t)-112;
        }
        Result[1] = ((Exponent + 112) << 23) | (Mantissa << 14);
        // Z Channel
        Mantissa = pSource->zm;
        if (Mantissa != 0) // The value is denormalized
        {
            // Normalize the value in the resulting float
            Exponent = 1;
            do
            {
                Exponent--;
                Mantissa <<= 1;
            } while ((Mantissa & 0x200) == 0);
            Mantissa &= 0x1FF;
        }
        else // The value is zero
        {
            Exponent = (uint32_t)-112;
        }
        Result[2] = ((Exponent + 112) << 23) | (Mantissa << 14);
    }
    return XMLoadFloat3A( reinterpret_cast<const XMFLOAT3A*>(&Result) );
 }
 //------------------------------------------------------------------------------
@ -1012,6 +924,31 @@ inline XMVECTOR XM_CALLCONV PackedVector::XMLoadUDecN4
 #endif // _XM_VMX128_INTRINSICS_
 }
 //------------------------------------------------------------------------------
 _Use_decl_annotations_
 inline XMVECTOR XM_CALLCONV PackedVector::XMLoadUDecN4_XR
 (
    const XMUDECN4* pSource
 )
 {
    assert(pSource);
    int32_t ElementX = pSource->v & 0x3FF;
    int32_t ElementY = (pSource->v >> 10) & 0x3FF;
    int32_t ElementZ = (pSource->v >> 20) & 0x3FF;
    XMVECTORF32 vResult = {
        (float)(ElementX - 0x180) / 510.0f,
        (float)(ElementY - 0x180) / 510.0f,
        (float)(ElementZ - 0x180) / 510.0f,
        (float)(pSource->v >> 30) / 3.0f
    };
    return vResult.v;
 }
 //------------------------------------------------------------------------------
 _Use_decl_annotations_
 inline XMVECTOR XM_CALLCONV PackedVector::XMLoadUDec4
@ -1814,77 +1751,33 @@ inline void XM_CALLCONV PackedVector::XMStoreFloat3SE
 {
    assert(pDestination);
-    __declspec(align(16)) uint32_t IValue[4];
+    XMFLOAT3A tmp;
-    XMStoreFloat3A( reinterpret_cast<XMFLOAT3A*>(&IValue), V );
+    XMStoreFloat3A( &tmp, V );
-    uint32_t Exp[3];
+    static const float maxf9 = float(0x1FF << 7);
-    uint32_t Frac[3];
+    static const float minf9 = float(1.f / (1 << 16));
-    // X, Y, Z Channels (5-bit exponent, 9-bit mantissa)
+    float x = (tmp.x >= 0.f) ? ( (tmp.x > maxf9) ? maxf9 : tmp.x ) : 0.f;
-    for(uint32_t j=0; j < 3; ++j)
+    float y = (tmp.y >= 0.f) ? ( (tmp.y > maxf9) ? maxf9 : tmp.y ) : 0.f;
-    {
+    float z = (tmp.z >= 0.f) ? ( (tmp.z > maxf9) ? maxf9 : tmp.z ) : 0.f;
        uint32_t Sign = IValue[j] & 0x80000000;
        uint32_t I = IValue[j] & 0x7FFFFFFF;
-        if ((I & 0x7F800000) == 0x7F800000)
+    const float max_xy = (x > y) ? x : y;
-        {
+    const float max_xyz = (max_xy > z) ? max_xy : z;
            // INF or NAN
            Exp[j] = 0x1f;
            if (( I & 0x7FFFFF ) != 0)
            {
                Frac[j] = ((I>>14)|(I>>5)|(I))&0x1ff;
            }
            else if ( Sign )
            {
                // -INF is clamped to 0 since 3SE is positive only
                Exp[j] = Frac[j] = 0;
            }
        }
        else if ( Sign )
        {
            // 3SE is positive only, so clamp to zero
            Exp[j] = Frac[j] = 0;
        }
        else if (I > 0x477FC000U)
        {
            // The number is too large, set to max
            Exp[j] = 0x1e;
            Frac[j] = 0x1ff;
        }
        else
        {
            if (I < 0x38800000U)
            {
                // The number is too small to be represented as a normalized float9
                // Convert it to a denormalized value.
                uint32_t Shift = 113U - (I >> 23U);
                I = (0x800000U | (I & 0x7FFFFFU)) >> Shift;
            }
            else
            {
                // Rebias the exponent to represent the value as a normalized float9
                I += 0xC8000000U;
            }
-            uint32_t T = ((I + 0x1FFFU + ((I >> 14U) & 1U)) >> 14U)&0x3fffU;
+    const float maxColor = (max_xyz > minf9) ? max_xyz : minf9;
-            Exp[j] = (T & 0x3E00) >> 9;
+    union { float f; int32_t i; } fi;
-            Frac[j] = T & 0x1ff;
+    fi.f = maxColor;
-        }
+    fi.i &= 0xFF800000; // cut off fraction
    }
-    // Adjust to a shared exponent
+    pDestination->e = (fi.i - 0x37800000) >> 23;
    uint32_t T = XMMax( Exp[0], XMMax( Exp[1], Exp[2] ) );
-    Frac[0] = Frac[0] >> (T - Exp[0]);
+    fi.i = 0x83000000 - fi.i;
-    Frac[1] = Frac[1] >> (T - Exp[1]);
+    float ScaleR = fi.f;
    Frac[2] = Frac[2] >> (T - Exp[2]);
-    // Store packed into memory
+    pDestination->xm = static_cast<uint32_t>( Internal::round_to_nearest(x * ScaleR) );
-    pDestination->xm = Frac[0];
+    pDestination->ym = static_cast<uint32_t>( Internal::round_to_nearest(y * ScaleR) );
-    pDestination->ym = Frac[1];
+    pDestination->zm = static_cast<uint32_t>( Internal::round_to_nearest(z * ScaleR) );
    pDestination->zm = Frac[2];
    pDestination->e = T;
 }
 //------------------------------------------------------------------------------
@ -2260,6 +2153,32 @@ inline void XM_CALLCONV PackedVector::XMStoreUDecN4
 #endif // _XM_VMX128_INTRINSICS_
 }
 //------------------------------------------------------------------------------
 _Use_decl_annotations_
 inline void XM_CALLCONV PackedVector::XMStoreUDecN4_XR
 (
    XMUDECN4* pDestination, 
    FXMVECTOR V
 )
 {
    assert(pDestination);
    static const XMVECTORF32  Scale = { 510.0f, 510.0f, 510.0f, 3.0f };
    static const XMVECTORF32  Bias  = { 384.0f, 384.0f, 384.0f, 0.0f };
    static const XMVECTORF32  C     = { 1023.f, 1023.f, 1023.f, 3.f };
    XMVECTOR N = XMVectorMultiplyAdd( V, Scale, Bias );
    N = XMVectorClamp( N, g_XMZero, C );
    XMFLOAT4A tmp;
    XMStoreFloat4A(&tmp, N );
    pDestination->v = ((uint32_t)tmp.w << 30)
                      | (((uint32_t)tmp.z & 0x3FF) << 20)
                      | (((uint32_t)tmp.y & 0x3FF) << 10)
                      | (((uint32_t)tmp.x & 0x3FF));
 }
 //------------------------------------------------------------------------------
 _Use_decl_annotations_
 inline void XM_CALLCONV PackedVector::XMStoreUDec4