From 55851275baff739df07df5a05ac541667d663094 Mon Sep 17 00:00:00 2001 From: Chuck Walbourn Date: Sat, 18 Jun 2016 16:00:43 -0700 Subject: [PATCH] Split SSE3 optimizations from SSE4 --- Inc/DirectXMath.h | 51 +++++++++++++++++----------------- Inc/DirectXMathMisc.inl | 61 +++++++++++++++-------------------------- 2 files changed, 48 insertions(+), 64 deletions(-) diff --git a/Inc/DirectXMath.h b/Inc/DirectXMath.h index 40a0d8c..89de84a 100644 --- a/Inc/DirectXMath.h +++ b/Inc/DirectXMath.h @@ -51,11 +51,9 @@ #define _XM_F16C_INTRINSICS_ #endif -#ifdef _XM_F16C_INTRINSICS_ -#ifndef _XM_AVX_INTRINSICS_ +#if defined(_XM_F16C_INTRINSICS_) && !defined(_XM_AVX_INTRINSICS_) #define _XM_AVX_INTRINSICS_ #endif -#endif // _XM_F16C_INTRINSICS_ #if !defined(_XM_AVX_INTRINSICS_) && defined(__AVX__) && !defined(_XM_NO_INTRINSICS_) #define _XM_AVX_INTRINSICS_ @@ -65,7 +63,11 @@ #define _XM_SSE4_INTRINSICS_ #endif -#if defined(_XM_SSE4_INTRINSICS_) && !defined(_XM_SSE_INTRINSICS_) +#if defined(_XM_SSE4_INTRINSICS_) && !defined(_XM_SSE3_INTRINSICS_) +#define _XM_SSE3_INTRINSICS_ +#endif + +#if defined(_XM_SSE3_INTRINSICS_) && !defined(_XM_SSE_INTRINSICS_) #define _XM_SSE_INTRINSICS_ #endif @@ -87,38 +89,37 @@ #include #pragma warning(pop) -#if defined(_XM_SSE_INTRINSICS_) -#ifndef _XM_NO_INTRINSICS_ -#include -#include -#endif -#elif defined(_XM_ARM_NEON_INTRINSICS_) #ifndef _XM_NO_INTRINSICS_ #pragma warning(push) #pragma warning(disable : 4987) // C4987: Off by default noise #include #pragma warning(pop) + +#ifdef _XM_SSE_INTRINSICS_ +#include +#include + +#ifdef _XM_SSE3_INTRINSICS_ +#include +#endif + +#ifdef _XM_SSE4_INTRINSICS_ +#include +#endif + +#ifdef _XM_AVX_INTRINSICS_ +#include +#endif + +#elif defined(_XM_ARM_NEON_INTRINSICS_) #ifdef _M_ARM64 #include #else #include #endif #endif -#endif - -#if defined(_XM_SSE4_INTRINSICS_) && !defined(_XM_NO_INTRINSICS_) -#pragma warning(push) -#pragma warning(disable : 4987) -// C4987: Off by default noise -#include -#pragma warning(pop) -#include -#endif - -#if defined(_XM_AVX_INTRINSICS_) && !defined(_XM_NO_INTRINSICS_) -#include -#endif +#endif // !_XM_NO_INTRINSICS_ #include #include @@ -1625,7 +1626,7 @@ template inline XMVECTOR XM_CALLCONV XMVectorSwizzle<0,1,2,3>(FXMVECTOR V) { return V; } -#if defined(_XM_SSE4_INTRINSICS_) && !defined(_XM_NO_INTRINSICS_) +#if defined(_XM_SSE3_INTRINSICS_) && !defined(_XM_NO_INTRINSICS_) template<> inline XMVECTOR XM_CALLCONV XMVectorSwizzle<0,0,2,2>(FXMVECTOR V) { return _mm_moveldup_ps(V); } template<> inline XMVECTOR XM_CALLCONV XMVectorSwizzle<1,1,3,3>(FXMVECTOR V) { return _mm_movehdup_ps(V); } #endif diff --git a/Inc/DirectXMathMisc.inl b/Inc/DirectXMathMisc.inl index 653dfd9..ab93620 100644 --- a/Inc/DirectXMathMisc.inl +++ b/Inc/DirectXMathMisc.inl @@ -1981,54 +1981,37 @@ inline XMVECTOR XM_CALLCONV XMColorSRGBToRGB( FXMVECTOR srgb ) inline bool XMVerifyCPUSupport() { #if defined(_XM_SSE_INTRINSICS_) && !defined(_XM_NO_INTRINSICS_) -#if defined(_XM_F16C_INTRINSICS_) || defined(_XM_AVX_INTRINSICS_) - int avxCPUInfo[4] = {-1}; - __cpuid( avxCPUInfo, 0 ); + int CPUInfo[4] = { -1 }; + __cpuid(CPUInfo, 0); + if (CPUInfo[0] < 1) + return false; - if ( avxCPUInfo[0] < 1 ) - return false; - - __cpuid(avxCPUInfo, 1 ); + __cpuid(CPUInfo, 1); #ifdef _XM_F16C_INTRINSICS_ - if ( (avxCPUInfo[2] & 0x38000000 ) != 0x38000000 ) - return false; // No F16C/AVX/OSXSAVE support -#else - if ( (avxCPUInfo[2] & 0x18000000 ) != 0x18000000 ) - return false; // No AVX/OSXSAVE support + if ((CPUInfo[2] & 0x38080001) != 0x38080001) + return false; // No F16C/AVX/OSXSAVE/SSE4.1/SSE3 support +#elif defined(_XM_AVX_INTRINSICS_) + if ((CPUInfo[2] & 0x18080001) != 0x18080001) + return false; // No AVX/OSXSAVE/SSE4.1/SSE3 support +#elif defined(_XM_SSE4_INTRINSICS_) + if ((CPUInfo[2] & 0x80001) != 0x80001) + return false; // No SSE3/SSE4.1 support +#elif defined(_XM_SSE3_INTRINSICS_) + if (!(CPUInfo[2] & 0x1)) + return false; // No SSE3 support #endif -#endif -#ifdef _XM_SSE4_INTRINSICS_ - int CPUInfo[4] = {-1}; - __cpuid( CPUInfo, 0 ); - if ( CPUInfo[0] < 1 ) - return false; + // The x64 processor model requires SSE2 support, but no harm in checking + if ((CPUInfo[3] & 0x6000000) != 0x6000000) + return false; // No SSE2/SSE support - __cpuid(CPUInfo, 1 ); - - if ( (CPUInfo[2] & 0x80001) != 0x80001 ) - return false; // Missing SSE3 or SSE 4.1 support -#endif -#if defined(_M_X64) - // The X64 processor model requires SSE2 support return true; -#elif defined(PF_XMMI_INSTRUCTIONS_AVAILABLE) - // Note that on Windows 2000 or older, SSE2 detection is not supported so this will always fail - // Detecting SSE2 on older versions of Windows would require using cpuid directly - return ( IsProcessorFeaturePresent( PF_XMMI_INSTRUCTIONS_AVAILABLE ) != 0 && IsProcessorFeaturePresent( PF_XMMI64_INSTRUCTIONS_AVAILABLE ) != 0 ); -#else - // If windows.h is not included, we return false (likely a false negative) - return false; -#endif #elif defined(_XM_ARM_NEON_INTRINSICS_) && !defined(_XM_NO_INTRINSICS_) -#ifdef PF_ARM_NEON_INSTRUCTIONS_AVAILABLE - return ( IsProcessorFeaturePresent( PF_ARM_NEON_INSTRUCTIONS_AVAILABLE ) != 0 ); -#else - // If windows.h is not included, we return false (likely a false negative) - return false; -#endif + // ARM-NEON support is required for the Windows on ARM platform + return true; #else + // No intrinsics path always supported return true; #endif }