1
0
mirror of https://github.com/microsoft/DirectXMath synced 2024-11-21 20:00:12 +00:00

Split SSE3 optimizations from SSE4

This commit is contained in:
Chuck Walbourn 2016-06-18 16:00:43 -07:00
parent 64306f6189
commit 55851275ba
2 changed files with 48 additions and 64 deletions

View File

@ -51,11 +51,9 @@
#define _XM_F16C_INTRINSICS_
#endif
#ifdef _XM_F16C_INTRINSICS_
#ifndef _XM_AVX_INTRINSICS_
#if defined(_XM_F16C_INTRINSICS_) && !defined(_XM_AVX_INTRINSICS_)
#define _XM_AVX_INTRINSICS_
#endif
#endif // _XM_F16C_INTRINSICS_
#if !defined(_XM_AVX_INTRINSICS_) && defined(__AVX__) && !defined(_XM_NO_INTRINSICS_)
#define _XM_AVX_INTRINSICS_
@ -65,7 +63,11 @@
#define _XM_SSE4_INTRINSICS_
#endif
#if defined(_XM_SSE4_INTRINSICS_) && !defined(_XM_SSE_INTRINSICS_)
#if defined(_XM_SSE4_INTRINSICS_) && !defined(_XM_SSE3_INTRINSICS_)
#define _XM_SSE3_INTRINSICS_
#endif
#if defined(_XM_SSE3_INTRINSICS_) && !defined(_XM_SSE_INTRINSICS_)
#define _XM_SSE_INTRINSICS_
#endif
@ -87,38 +89,37 @@
#include <malloc.h>
#pragma warning(pop)
#if defined(_XM_SSE_INTRINSICS_)
#ifndef _XM_NO_INTRINSICS_
#include <xmmintrin.h>
#include <emmintrin.h>
#endif
#elif defined(_XM_ARM_NEON_INTRINSICS_)
#ifndef _XM_NO_INTRINSICS_
#pragma warning(push)
#pragma warning(disable : 4987)
// C4987: Off by default noise
#include <intrin.h>
#pragma warning(pop)
#ifdef _XM_SSE_INTRINSICS_
#include <xmmintrin.h>
#include <emmintrin.h>
#ifdef _XM_SSE3_INTRINSICS_
#include <pmmintrin.h>
#endif
#ifdef _XM_SSE4_INTRINSICS_
#include <smmintrin.h>
#endif
#ifdef _XM_AVX_INTRINSICS_
#include <immintrin.h>
#endif
#elif defined(_XM_ARM_NEON_INTRINSICS_)
#ifdef _M_ARM64
#include <arm64_neon.h>
#else
#include <arm_neon.h>
#endif
#endif
#endif
#if defined(_XM_SSE4_INTRINSICS_) && !defined(_XM_NO_INTRINSICS_)
#pragma warning(push)
#pragma warning(disable : 4987)
// C4987: Off by default noise
#include <intrin.h>
#pragma warning(pop)
#include <smmintrin.h>
#endif
#if defined(_XM_AVX_INTRINSICS_) && !defined(_XM_NO_INTRINSICS_)
#include <immintrin.h>
#endif
#endif // !_XM_NO_INTRINSICS_
#include <sal.h>
#include <assert.h>
@ -1625,7 +1626,7 @@ template<uint32_t SwizzleX, uint32_t SwizzleY, uint32_t SwizzleZ, uint32_t Swizz
// Specialized swizzles
template<> inline XMVECTOR XM_CALLCONV XMVectorSwizzle<0,1,2,3>(FXMVECTOR V) { return V; }
#if defined(_XM_SSE4_INTRINSICS_) && !defined(_XM_NO_INTRINSICS_)
#if defined(_XM_SSE3_INTRINSICS_) && !defined(_XM_NO_INTRINSICS_)
template<> inline XMVECTOR XM_CALLCONV XMVectorSwizzle<0,0,2,2>(FXMVECTOR V) { return _mm_moveldup_ps(V); }
template<> inline XMVECTOR XM_CALLCONV XMVectorSwizzle<1,1,3,3>(FXMVECTOR V) { return _mm_movehdup_ps(V); }
#endif

View File

@ -1981,54 +1981,37 @@ inline XMVECTOR XM_CALLCONV XMColorSRGBToRGB( FXMVECTOR srgb )
inline bool XMVerifyCPUSupport()
{
#if defined(_XM_SSE_INTRINSICS_) && !defined(_XM_NO_INTRINSICS_)
#if defined(_XM_F16C_INTRINSICS_) || defined(_XM_AVX_INTRINSICS_)
int avxCPUInfo[4] = {-1};
__cpuid( avxCPUInfo, 0 );
int CPUInfo[4] = { -1 };
__cpuid(CPUInfo, 0);
if (CPUInfo[0] < 1)
return false;
if ( avxCPUInfo[0] < 1 )
return false;
__cpuid(avxCPUInfo, 1 );
__cpuid(CPUInfo, 1);
#ifdef _XM_F16C_INTRINSICS_
if ( (avxCPUInfo[2] & 0x38000000 ) != 0x38000000 )
return false; // No F16C/AVX/OSXSAVE support
#else
if ( (avxCPUInfo[2] & 0x18000000 ) != 0x18000000 )
return false; // No AVX/OSXSAVE support
if ((CPUInfo[2] & 0x38080001) != 0x38080001)
return false; // No F16C/AVX/OSXSAVE/SSE4.1/SSE3 support
#elif defined(_XM_AVX_INTRINSICS_)
if ((CPUInfo[2] & 0x18080001) != 0x18080001)
return false; // No AVX/OSXSAVE/SSE4.1/SSE3 support
#elif defined(_XM_SSE4_INTRINSICS_)
if ((CPUInfo[2] & 0x80001) != 0x80001)
return false; // No SSE3/SSE4.1 support
#elif defined(_XM_SSE3_INTRINSICS_)
if (!(CPUInfo[2] & 0x1))
return false; // No SSE3 support
#endif
#endif
#ifdef _XM_SSE4_INTRINSICS_
int CPUInfo[4] = {-1};
__cpuid( CPUInfo, 0 );
if ( CPUInfo[0] < 1 )
return false;
// The x64 processor model requires SSE2 support, but no harm in checking
if ((CPUInfo[3] & 0x6000000) != 0x6000000)
return false; // No SSE2/SSE support
__cpuid(CPUInfo, 1 );
if ( (CPUInfo[2] & 0x80001) != 0x80001 )
return false; // Missing SSE3 or SSE 4.1 support
#endif
#if defined(_M_X64)
// The X64 processor model requires SSE2 support
return true;
#elif defined(PF_XMMI_INSTRUCTIONS_AVAILABLE)
// Note that on Windows 2000 or older, SSE2 detection is not supported so this will always fail
// Detecting SSE2 on older versions of Windows would require using cpuid directly
return ( IsProcessorFeaturePresent( PF_XMMI_INSTRUCTIONS_AVAILABLE ) != 0 && IsProcessorFeaturePresent( PF_XMMI64_INSTRUCTIONS_AVAILABLE ) != 0 );
#else
// If windows.h is not included, we return false (likely a false negative)
return false;
#endif
#elif defined(_XM_ARM_NEON_INTRINSICS_) && !defined(_XM_NO_INTRINSICS_)
#ifdef PF_ARM_NEON_INSTRUCTIONS_AVAILABLE
return ( IsProcessorFeaturePresent( PF_ARM_NEON_INSTRUCTIONS_AVAILABLE ) != 0 );
#else
// If windows.h is not included, we return false (likely a false negative)
return false;
#endif
// ARM-NEON support is required for the Windows on ARM platform
return true;
#else
// No intrinsics path always supported
return true;
#endif
}