From c592223086ae7917d844e4348953ee179e2fe35e Mon Sep 17 00:00:00 2001
From: Chuck Walbourn <chuckw@windows.microsoft.com>
Date: Wed, 21 Jun 2017 13:32:45 -0700
Subject: [PATCH] June 2017

---
 Inc/DirectXMath.h         |  2 +-
 Inc/DirectXMathMatrix.inl | 16 ++++++++--------
 ReadMe.txt                |  8 +++++++-
 3 files changed, 16 insertions(+), 10 deletions(-)
diff --git a/Inc/DirectXMath.h b/Inc/DirectXMath.h
index 39581f5..6e74ab1 100644
--- a/Inc/DirectXMath.h
+++ b/Inc/DirectXMath.h
@@ -17,7 +17,7 @@
 #error DirectX Math requires C++
 #endif
 
-#define DIRECTX_MATH_VERSION 310
+#define DIRECTX_MATH_VERSION 311
 
 #if defined(_MSC_VER) && (_MSC_VER < 1800)
 #error DirectX Math Visual C++ 2013 or later.
diff --git a/Inc/DirectXMathMatrix.inl b/Inc/DirectXMathMatrix.inl
index b474bf4..95e2c48 100644
--- a/Inc/DirectXMathMatrix.inl
+++ b/Inc/DirectXMathMatrix.inl
@@ -298,7 +298,7 @@ inline XMMATRIX XM_CALLCONV XMMatrixMultiply
 #elif defined(_XM_SSE_INTRINSICS_)
     XMMATRIX mResult;
     // Splat the component X,Y,Z then W
-#if defined(_XM_AVX_INTRINSICS_)
+#if defined(_XM_AVX_INTRINSICS_) && (!defined(_MSC_VER) || (_MSC_VER >= 1800))
     XMVECTOR vX = _mm_broadcast_ss(reinterpret_cast<const float*>(&M1.r[0]) + 0);
     XMVECTOR vY = _mm_broadcast_ss(reinterpret_cast<const float*>(&M1.r[0]) + 1);
     XMVECTOR vZ = _mm_broadcast_ss(reinterpret_cast<const float*>(&M1.r[0]) + 2);
@@ -322,7 +322,7 @@ inline XMMATRIX XM_CALLCONV XMMatrixMultiply
     vX = _mm_add_ps(vX,vY);
     mResult.r[0] = vX;
     // Repeat for the other 3 rows
-#if defined(_XM_AVX_INTRINSICS_)
+#if defined(_XM_AVX_INTRINSICS_) && (!defined(_MSC_VER) || (_MSC_VER >= 1800))
     vX = _mm_broadcast_ss(reinterpret_cast<const float*>(&M1.r[1]) + 0);
     vY = _mm_broadcast_ss(reinterpret_cast<const float*>(&M1.r[1]) + 1);
     vZ = _mm_broadcast_ss(reinterpret_cast<const float*>(&M1.r[1]) + 2);
@@ -342,7 +342,7 @@ inline XMMATRIX XM_CALLCONV XMMatrixMultiply
     vY = _mm_add_ps(vY,vW);
     vX = _mm_add_ps(vX,vY);
     mResult.r[1] = vX;
-#if defined(_XM_AVX_INTRINSICS_)
+#if defined(_XM_AVX_INTRINSICS_) && (!defined(_MSC_VER) || (_MSC_VER >= 1800))
     vX = _mm_broadcast_ss(reinterpret_cast<const float*>(&M1.r[2]) + 0);
     vY = _mm_broadcast_ss(reinterpret_cast<const float*>(&M1.r[2]) + 1);
     vZ = _mm_broadcast_ss(reinterpret_cast<const float*>(&M1.r[2]) + 2);
@@ -362,7 +362,7 @@ inline XMMATRIX XM_CALLCONV XMMatrixMultiply
     vY = _mm_add_ps(vY,vW);
     vX = _mm_add_ps(vX,vY);
     mResult.r[2] = vX;
-#if defined(_XM_AVX_INTRINSICS_)
+#if defined(_XM_AVX_INTRINSICS_) && (!defined(_MSC_VER) || (_MSC_VER >= 1800))
     vX = _mm_broadcast_ss(reinterpret_cast<const float*>(&M1.r[3]) + 0);
     vY = _mm_broadcast_ss(reinterpret_cast<const float*>(&M1.r[3]) + 1);
     vZ = _mm_broadcast_ss(reinterpret_cast<const float*>(&M1.r[3]) + 2);
@@ -479,7 +479,7 @@ inline XMMATRIX XM_CALLCONV XMMatrixMultiplyTranspose
     return mResult;
 #elif defined(_XM_SSE_INTRINSICS_)
     // Splat the component X,Y,Z then W
-#if defined(_XM_AVX_INTRINSICS_)
+#if defined(_XM_AVX_INTRINSICS_) && (!defined(_MSC_VER) || (_MSC_VER >= 1800))
     XMVECTOR vX = _mm_broadcast_ss(reinterpret_cast<const float*>(&M1.r[0]) + 0);
     XMVECTOR vY = _mm_broadcast_ss(reinterpret_cast<const float*>(&M1.r[0]) + 1);
     XMVECTOR vZ = _mm_broadcast_ss(reinterpret_cast<const float*>(&M1.r[0]) + 2);
@@ -503,7 +503,7 @@ inline XMMATRIX XM_CALLCONV XMMatrixMultiplyTranspose
     vX = _mm_add_ps(vX,vY);
     XMVECTOR r0 = vX;
     // Repeat for the other 3 rows
-#if defined(_XM_AVX_INTRINSICS_)
+#if defined(_XM_AVX_INTRINSICS_) && (!defined(_MSC_VER) || (_MSC_VER >= 1800))
     vX = _mm_broadcast_ss(reinterpret_cast<const float*>(&M1.r[1]) + 0);
     vY = _mm_broadcast_ss(reinterpret_cast<const float*>(&M1.r[1]) + 1);
     vZ = _mm_broadcast_ss(reinterpret_cast<const float*>(&M1.r[1]) + 2);
@@ -523,7 +523,7 @@ inline XMMATRIX XM_CALLCONV XMMatrixMultiplyTranspose
     vY = _mm_add_ps(vY,vW);
     vX = _mm_add_ps(vX,vY);
     XMVECTOR r1 = vX;
-#if defined(_XM_AVX_INTRINSICS_)
+#if defined(_XM_AVX_INTRINSICS_) && (!defined(_MSC_VER) || (_MSC_VER >= 1800))
     vX = _mm_broadcast_ss(reinterpret_cast<const float*>(&M1.r[2]) + 0);
     vY = _mm_broadcast_ss(reinterpret_cast<const float*>(&M1.r[2]) + 1);
     vZ = _mm_broadcast_ss(reinterpret_cast<const float*>(&M1.r[2]) + 2);
@@ -543,7 +543,7 @@ inline XMMATRIX XM_CALLCONV XMMatrixMultiplyTranspose
     vY = _mm_add_ps(vY,vW);
     vX = _mm_add_ps(vX,vY);
     XMVECTOR r2 = vX;
-#if defined(_XM_AVX_INTRINSICS_)
+#if defined(_XM_AVX_INTRINSICS_) && (!defined(_MSC_VER) || (_MSC_VER >= 1800))
     vX = _mm_broadcast_ss(reinterpret_cast<const float*>(&M1.r[3]) + 0);
     vY = _mm_broadcast_ss(reinterpret_cast<const float*>(&M1.r[3]) + 1);
     vZ = _mm_broadcast_ss(reinterpret_cast<const float*>(&M1.r[3]) + 2);
diff --git a/ReadMe.txt b/ReadMe.txt
index e867d00..90b4953 100644
--- a/ReadMe.txt
+++ b/ReadMe.txt
@@ -4,7 +4,7 @@ DirectXMath
 
 Copyright (c) Microsoft Corporation. All rights reserved.
 
-January 2017
+June 2017
 
 This package contains the DirectXMath library, an all inline SIMD C++ linear algebra library
 for use in games and graphics apps
@@ -59,6 +59,12 @@ https://opensource.microsoft.com/codeofconduct/
 RELEASE HISTORY
 ---------------
 
+June 2017 (3.11)
+    AVX optimization of XMMatrixMultiply and XMMatrixMultiplyTranspose
+    AVX2 optimization for XMVectorSplatX
+    FMA3 optimization of XMVectorMultiplyAdd and XMVectorNegativeMultiplySubtract (implied by /arch:AVX2)
+    Conformance fixes to support compilation with Clang 3.7
+
 January 2017 (3.10)
     Added XMVectorSum for horizontal adds
     ARMv8 intrinsics use for ARM64 platform (division, rounding, half-precision conversion)