From c592223086ae7917d844e4348953ee179e2fe35e Mon Sep 17 00:00:00 2001 From: Chuck Walbourn Date: Wed, 21 Jun 2017 13:32:45 -0700 Subject: [PATCH] June 2017 --- Inc/DirectXMath.h | 2 +- Inc/DirectXMathMatrix.inl | 16 ++++++++-------- ReadMe.txt | 8 +++++++- 3 files changed, 16 insertions(+), 10 deletions(-) diff --git a/Inc/DirectXMath.h b/Inc/DirectXMath.h index 39581f5..6e74ab1 100644 --- a/Inc/DirectXMath.h +++ b/Inc/DirectXMath.h @@ -17,7 +17,7 @@ #error DirectX Math requires C++ #endif -#define DIRECTX_MATH_VERSION 310 +#define DIRECTX_MATH_VERSION 311 #if defined(_MSC_VER) && (_MSC_VER < 1800) #error DirectX Math Visual C++ 2013 or later. diff --git a/Inc/DirectXMathMatrix.inl b/Inc/DirectXMathMatrix.inl index b474bf4..95e2c48 100644 --- a/Inc/DirectXMathMatrix.inl +++ b/Inc/DirectXMathMatrix.inl @@ -298,7 +298,7 @@ inline XMMATRIX XM_CALLCONV XMMatrixMultiply #elif defined(_XM_SSE_INTRINSICS_) XMMATRIX mResult; // Splat the component X,Y,Z then W -#if defined(_XM_AVX_INTRINSICS_) +#if defined(_XM_AVX_INTRINSICS_) && (!defined(_MSC_VER) || (_MSC_VER >= 1800)) XMVECTOR vX = _mm_broadcast_ss(reinterpret_cast(&M1.r[0]) + 0); XMVECTOR vY = _mm_broadcast_ss(reinterpret_cast(&M1.r[0]) + 1); XMVECTOR vZ = _mm_broadcast_ss(reinterpret_cast(&M1.r[0]) + 2); @@ -322,7 +322,7 @@ inline XMMATRIX XM_CALLCONV XMMatrixMultiply vX = _mm_add_ps(vX,vY); mResult.r[0] = vX; // Repeat for the other 3 rows -#if defined(_XM_AVX_INTRINSICS_) +#if defined(_XM_AVX_INTRINSICS_) && (!defined(_MSC_VER) || (_MSC_VER >= 1800)) vX = _mm_broadcast_ss(reinterpret_cast(&M1.r[1]) + 0); vY = _mm_broadcast_ss(reinterpret_cast(&M1.r[1]) + 1); vZ = _mm_broadcast_ss(reinterpret_cast(&M1.r[1]) + 2); @@ -342,7 +342,7 @@ inline XMMATRIX XM_CALLCONV XMMatrixMultiply vY = _mm_add_ps(vY,vW); vX = _mm_add_ps(vX,vY); mResult.r[1] = vX; -#if defined(_XM_AVX_INTRINSICS_) +#if defined(_XM_AVX_INTRINSICS_) && (!defined(_MSC_VER) || (_MSC_VER >= 1800)) vX = _mm_broadcast_ss(reinterpret_cast(&M1.r[2]) + 0); vY = _mm_broadcast_ss(reinterpret_cast(&M1.r[2]) + 1); vZ = _mm_broadcast_ss(reinterpret_cast(&M1.r[2]) + 2); @@ -362,7 +362,7 @@ inline XMMATRIX XM_CALLCONV XMMatrixMultiply vY = _mm_add_ps(vY,vW); vX = _mm_add_ps(vX,vY); mResult.r[2] = vX; -#if defined(_XM_AVX_INTRINSICS_) +#if defined(_XM_AVX_INTRINSICS_) && (!defined(_MSC_VER) || (_MSC_VER >= 1800)) vX = _mm_broadcast_ss(reinterpret_cast(&M1.r[3]) + 0); vY = _mm_broadcast_ss(reinterpret_cast(&M1.r[3]) + 1); vZ = _mm_broadcast_ss(reinterpret_cast(&M1.r[3]) + 2); @@ -479,7 +479,7 @@ inline XMMATRIX XM_CALLCONV XMMatrixMultiplyTranspose return mResult; #elif defined(_XM_SSE_INTRINSICS_) // Splat the component X,Y,Z then W -#if defined(_XM_AVX_INTRINSICS_) +#if defined(_XM_AVX_INTRINSICS_) && (!defined(_MSC_VER) || (_MSC_VER >= 1800)) XMVECTOR vX = _mm_broadcast_ss(reinterpret_cast(&M1.r[0]) + 0); XMVECTOR vY = _mm_broadcast_ss(reinterpret_cast(&M1.r[0]) + 1); XMVECTOR vZ = _mm_broadcast_ss(reinterpret_cast(&M1.r[0]) + 2); @@ -503,7 +503,7 @@ inline XMMATRIX XM_CALLCONV XMMatrixMultiplyTranspose vX = _mm_add_ps(vX,vY); XMVECTOR r0 = vX; // Repeat for the other 3 rows -#if defined(_XM_AVX_INTRINSICS_) +#if defined(_XM_AVX_INTRINSICS_) && (!defined(_MSC_VER) || (_MSC_VER >= 1800)) vX = _mm_broadcast_ss(reinterpret_cast(&M1.r[1]) + 0); vY = _mm_broadcast_ss(reinterpret_cast(&M1.r[1]) + 1); vZ = _mm_broadcast_ss(reinterpret_cast(&M1.r[1]) + 2); @@ -523,7 +523,7 @@ inline XMMATRIX XM_CALLCONV XMMatrixMultiplyTranspose vY = _mm_add_ps(vY,vW); vX = _mm_add_ps(vX,vY); XMVECTOR r1 = vX; -#if defined(_XM_AVX_INTRINSICS_) +#if defined(_XM_AVX_INTRINSICS_) && (!defined(_MSC_VER) || (_MSC_VER >= 1800)) vX = _mm_broadcast_ss(reinterpret_cast(&M1.r[2]) + 0); vY = _mm_broadcast_ss(reinterpret_cast(&M1.r[2]) + 1); vZ = _mm_broadcast_ss(reinterpret_cast(&M1.r[2]) + 2); @@ -543,7 +543,7 @@ inline XMMATRIX XM_CALLCONV XMMatrixMultiplyTranspose vY = _mm_add_ps(vY,vW); vX = _mm_add_ps(vX,vY); XMVECTOR r2 = vX; -#if defined(_XM_AVX_INTRINSICS_) +#if defined(_XM_AVX_INTRINSICS_) && (!defined(_MSC_VER) || (_MSC_VER >= 1800)) vX = _mm_broadcast_ss(reinterpret_cast(&M1.r[3]) + 0); vY = _mm_broadcast_ss(reinterpret_cast(&M1.r[3]) + 1); vZ = _mm_broadcast_ss(reinterpret_cast(&M1.r[3]) + 2); diff --git a/ReadMe.txt b/ReadMe.txt index e867d00..90b4953 100644 --- a/ReadMe.txt +++ b/ReadMe.txt @@ -4,7 +4,7 @@ DirectXMath Copyright (c) Microsoft Corporation. All rights reserved. -January 2017 +June 2017 This package contains the DirectXMath library, an all inline SIMD C++ linear algebra library for use in games and graphics apps @@ -59,6 +59,12 @@ https://opensource.microsoft.com/codeofconduct/ RELEASE HISTORY --------------- +June 2017 (3.11) + AVX optimization of XMMatrixMultiply and XMMatrixMultiplyTranspose + AVX2 optimization for XMVectorSplatX + FMA3 optimization of XMVectorMultiplyAdd and XMVectorNegativeMultiplySubtract (implied by /arch:AVX2) + Conformance fixes to support compilation with Clang 3.7 + January 2017 (3.10) Added XMVectorSum for horizontal adds ARMv8 intrinsics use for ARM64 platform (division, rounding, half-precision conversion)