From 0d821781da4ed0f20cad4e30e618afab45a08f0f Mon Sep 17 00:00:00 2001
From: Chuck Walbourn <walbourn@users.noreply.github.com>
Date: Tue, 15 Oct 2024 15:21:59 -0700
Subject: [PATCH] October 2024 (#214)

---
 CHANGELOG.md      |  6 ++++++
 CMakeLists.txt    |  2 +-
 Inc/DirectXMath.h | 10 +++++-----
 README.md         |  2 +-
 XDSP/XDSP.h       | 30 ++++++++++++++++--------------
 5 files changed, 29 insertions(+), 21 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index f67ce2c..5f2259e 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -6,6 +6,12 @@ Release available for download on [GitHub](https://github.com/microsoft/DirectXM
 
 ## Release History
 
+### October 2024 (3.20)
+* Fixed close-to-zero bug in the implementation of `TriangleTests::Intersects`
+* Renamed implementation namespace from `DirectX::Internal` to `DirectX::MathInternal` to avoid some conformance issues with other libraries
+* CMake project updates including support for ARM64EC
+* Added GitHub Actions YAML files
+
 ### February 2024 (3.19)
 * Fix to address MinGW issue with ``__cpuid`` in cpuid.h vs. intrin.h
 * Additional updates for clang/LLVM and GNUC
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 16a407c..d10adc1 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -3,7 +3,7 @@
 
 cmake_minimum_required (VERSION 3.20)
 
-set(DIRECTXMATH_VERSION 3.19)
+set(DIRECTXMATH_VERSION 3.20)
 
 project(DirectXMath
   VERSION ${DIRECTXMATH_VERSION}
diff --git a/Inc/DirectXMath.h b/Inc/DirectXMath.h
index 47c01b2..dc9f565 100644
--- a/Inc/DirectXMath.h
+++ b/Inc/DirectXMath.h
@@ -13,7 +13,7 @@
 #error DirectX Math requires C++
 #endif
 
-#define DIRECTX_MATH_VERSION 319
+#define DIRECTX_MATH_VERSION 320
 
 #if defined(_MSC_VER) && (_MSC_VER < 1910)
 #error DirectX Math requires Visual C++ 2017 or later.
@@ -338,7 +338,7 @@ namespace DirectX
      *
      ****************************************************************************/
 
-#ifdef _MSC_VER    
+#ifdef _MSC_VER
 #pragma warning(push)
 #pragma warning(disable:4068 4201 4365 4324 4820)
      // C4068: ignore unknown pragmas
@@ -2156,7 +2156,7 @@ namespace DirectX
      *
      ****************************************************************************/
 
-#ifdef _MSC_VER    
+#ifdef _MSC_VER
 #pragma warning(push)
 #pragma warning(disable:4068 4214 4204 4365 4616 4640 6001 6101)
      // C4068/4616: ignore unknown pragmas
@@ -2164,7 +2164,7 @@ namespace DirectX
      // C4365/4640: Off by default noise
      // C6001/6101: False positives
 #endif
-    
+
 #ifdef _PREFAST_
 #pragma prefast(push)
 #pragma prefast(disable : 25000, "FXMVECTOR is 16 bytes")
@@ -2281,7 +2281,7 @@ namespace DirectX
 #ifdef _PREFAST_
 #pragma prefast(pop)
 #endif
-#ifdef _MSC_VER    
+#ifdef _MSC_VER
 #pragma warning(pop)
 #endif
 
diff --git a/README.md b/README.md
index d1f3238..1c1e81f 100644
--- a/README.md
+++ b/README.md
@@ -6,7 +6,7 @@ https://github.com/Microsoft/DirectXMath
 
 Copyright (c) Microsoft Corporation.
 
-**February 2024**
+**October 2024**
 
 This package contains the DirectXMath library, an all inline SIMD C++ linear algebra library for use in games and graphics apps.
 
diff --git a/XDSP/XDSP.h b/XDSP/XDSP.h
index b8c2f0e..3e07b89 100644
--- a/XDSP/XDSP.h
+++ b/XDSP/XDSP.h
@@ -41,7 +41,7 @@ namespace XDSP
     using CXMVECTOR = DirectX::CXMVECTOR;
     using XMFLOAT4A = DirectX::XMFLOAT4A;
 
-    inline bool ISPOWEROF2(size_t n) { return (((n)&((n)-1)) == 0 && (n) != 0); }
+    constexpr bool ISPOWEROF2(size_t n) { return (((n)&((n)-1)) == 0 && (n) != 0); }
 
     // Parallel multiplication of four complex numbers, assuming real and imaginary values are stored in separate vectors.
     inline void XM_CALLCONV vmulComplex(
@@ -457,42 +457,44 @@ namespace XDSP
         // pUnityTable[0 to uLength*4-1] contains real components for current FFT length
         // pUnityTable[uLength*4 to uLength*8-1] contains imaginary components for current FFT length
         static const XMVECTORF32 vXM0123 = { { { 0.0f, 1.0f, 2.0f, 3.0f } } };
-        uLength >>= 2;
-        XMVECTOR vlStep = XMVectorReplicate(XM_PIDIV2 / float(uLength));
+
+        size_t len = uLength;
+        len >>= 2;
+        XMVECTOR vlStep = XMVectorReplicate(XM_PIDIV2 / float(len));
         do
         {
-            uLength >>= 2;
+            len >>= 2;
             XMVECTOR vJP = vXM0123;
-            for (size_t j = 0; j < uLength; ++j)
+            for (size_t j = 0; j < len; ++j)
             {
                 XMVECTOR vSin, vCos;
                 XMVECTOR viJP, vlS;
 
                 pUnityTable[j] = g_XMOne;
-                pUnityTable[j + uLength * 4] = XMVectorZero();
+                pUnityTable[j + len * 4] = XMVectorZero();
 
                 vlS = XMVectorMultiply(vJP, vlStep);
                 XMVectorSinCos(&vSin, &vCos, vlS);
-                pUnityTable[j + uLength] = vCos;
-                pUnityTable[j + uLength * 5] = XMVectorMultiply(vSin, g_XMNegativeOne);
+                pUnityTable[j + len] = vCos;
+                pUnityTable[j + len * 5] = XMVectorMultiply(vSin, g_XMNegativeOne);
 
                 viJP = XMVectorAdd(vJP, vJP);
                 vlS = XMVectorMultiply(viJP, vlStep);
                 XMVectorSinCos(&vSin, &vCos, vlS);
-                pUnityTable[j + uLength * 2] = vCos;
-                pUnityTable[j + uLength * 6] = XMVectorMultiply(vSin, g_XMNegativeOne);
+                pUnityTable[j + len * 2] = vCos;
+                pUnityTable[j + len * 6] = XMVectorMultiply(vSin, g_XMNegativeOne);
 
                 viJP = XMVectorAdd(viJP, vJP);
                 vlS = XMVectorMultiply(viJP, vlStep);
                 XMVectorSinCos(&vSin, &vCos, vlS);
-                pUnityTable[j + uLength * 3] = vCos;
-                pUnityTable[j + uLength * 7] = XMVectorMultiply(vSin, g_XMNegativeOne);
+                pUnityTable[j + len * 3] = vCos;
+                pUnityTable[j + len * 7] = XMVectorMultiply(vSin, g_XMNegativeOne);
 
                 vJP = XMVectorAdd(vJP, g_XMFour);
             }
             vlStep = XMVectorMultiply(vlStep, g_XMFour);
-            pUnityTable += uLength * 8;
-        } while (uLength > 4);
+            pUnityTable += len * 8;
+        } while (len > 4);
     }
 
     //----------------------------------------------------------------------------------