mirror of
https://github.com/microsoft/DirectXMath
synced 2024-11-21 20:00:12 +00:00
Split SSE3 optimizations from SSE4
This commit is contained in:
parent
64306f6189
commit
55851275ba
@ -51,11 +51,9 @@
|
||||
#define _XM_F16C_INTRINSICS_
|
||||
#endif
|
||||
|
||||
#ifdef _XM_F16C_INTRINSICS_
|
||||
#ifndef _XM_AVX_INTRINSICS_
|
||||
#if defined(_XM_F16C_INTRINSICS_) && !defined(_XM_AVX_INTRINSICS_)
|
||||
#define _XM_AVX_INTRINSICS_
|
||||
#endif
|
||||
#endif // _XM_F16C_INTRINSICS_
|
||||
|
||||
#if !defined(_XM_AVX_INTRINSICS_) && defined(__AVX__) && !defined(_XM_NO_INTRINSICS_)
|
||||
#define _XM_AVX_INTRINSICS_
|
||||
@ -65,7 +63,11 @@
|
||||
#define _XM_SSE4_INTRINSICS_
|
||||
#endif
|
||||
|
||||
#if defined(_XM_SSE4_INTRINSICS_) && !defined(_XM_SSE_INTRINSICS_)
|
||||
#if defined(_XM_SSE4_INTRINSICS_) && !defined(_XM_SSE3_INTRINSICS_)
|
||||
#define _XM_SSE3_INTRINSICS_
|
||||
#endif
|
||||
|
||||
#if defined(_XM_SSE3_INTRINSICS_) && !defined(_XM_SSE_INTRINSICS_)
|
||||
#define _XM_SSE_INTRINSICS_
|
||||
#endif
|
||||
|
||||
@ -87,38 +89,37 @@
|
||||
#include <malloc.h>
|
||||
#pragma warning(pop)
|
||||
|
||||
#if defined(_XM_SSE_INTRINSICS_)
|
||||
#ifndef _XM_NO_INTRINSICS_
|
||||
#include <xmmintrin.h>
|
||||
#include <emmintrin.h>
|
||||
#endif
|
||||
#elif defined(_XM_ARM_NEON_INTRINSICS_)
|
||||
#ifndef _XM_NO_INTRINSICS_
|
||||
#pragma warning(push)
|
||||
#pragma warning(disable : 4987)
|
||||
// C4987: Off by default noise
|
||||
#include <intrin.h>
|
||||
#pragma warning(pop)
|
||||
|
||||
#ifdef _XM_SSE_INTRINSICS_
|
||||
#include <xmmintrin.h>
|
||||
#include <emmintrin.h>
|
||||
|
||||
#ifdef _XM_SSE3_INTRINSICS_
|
||||
#include <pmmintrin.h>
|
||||
#endif
|
||||
|
||||
#ifdef _XM_SSE4_INTRINSICS_
|
||||
#include <smmintrin.h>
|
||||
#endif
|
||||
|
||||
#ifdef _XM_AVX_INTRINSICS_
|
||||
#include <immintrin.h>
|
||||
#endif
|
||||
|
||||
#elif defined(_XM_ARM_NEON_INTRINSICS_)
|
||||
#ifdef _M_ARM64
|
||||
#include <arm64_neon.h>
|
||||
#else
|
||||
#include <arm_neon.h>
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(_XM_SSE4_INTRINSICS_) && !defined(_XM_NO_INTRINSICS_)
|
||||
#pragma warning(push)
|
||||
#pragma warning(disable : 4987)
|
||||
// C4987: Off by default noise
|
||||
#include <intrin.h>
|
||||
#pragma warning(pop)
|
||||
#include <smmintrin.h>
|
||||
#endif
|
||||
|
||||
#if defined(_XM_AVX_INTRINSICS_) && !defined(_XM_NO_INTRINSICS_)
|
||||
#include <immintrin.h>
|
||||
#endif
|
||||
#endif // !_XM_NO_INTRINSICS_
|
||||
|
||||
#include <sal.h>
|
||||
#include <assert.h>
|
||||
@ -1625,7 +1626,7 @@ template<uint32_t SwizzleX, uint32_t SwizzleY, uint32_t SwizzleZ, uint32_t Swizz
|
||||
// Specialized swizzles
|
||||
template<> inline XMVECTOR XM_CALLCONV XMVectorSwizzle<0,1,2,3>(FXMVECTOR V) { return V; }
|
||||
|
||||
#if defined(_XM_SSE4_INTRINSICS_) && !defined(_XM_NO_INTRINSICS_)
|
||||
#if defined(_XM_SSE3_INTRINSICS_) && !defined(_XM_NO_INTRINSICS_)
|
||||
template<> inline XMVECTOR XM_CALLCONV XMVectorSwizzle<0,0,2,2>(FXMVECTOR V) { return _mm_moveldup_ps(V); }
|
||||
template<> inline XMVECTOR XM_CALLCONV XMVectorSwizzle<1,1,3,3>(FXMVECTOR V) { return _mm_movehdup_ps(V); }
|
||||
#endif
|
||||
|
@ -1981,54 +1981,37 @@ inline XMVECTOR XM_CALLCONV XMColorSRGBToRGB( FXMVECTOR srgb )
|
||||
inline bool XMVerifyCPUSupport()
|
||||
{
|
||||
#if defined(_XM_SSE_INTRINSICS_) && !defined(_XM_NO_INTRINSICS_)
|
||||
#if defined(_XM_F16C_INTRINSICS_) || defined(_XM_AVX_INTRINSICS_)
|
||||
int avxCPUInfo[4] = {-1};
|
||||
__cpuid( avxCPUInfo, 0 );
|
||||
int CPUInfo[4] = { -1 };
|
||||
__cpuid(CPUInfo, 0);
|
||||
if (CPUInfo[0] < 1)
|
||||
return false;
|
||||
|
||||
if ( avxCPUInfo[0] < 1 )
|
||||
return false;
|
||||
|
||||
__cpuid(avxCPUInfo, 1 );
|
||||
__cpuid(CPUInfo, 1);
|
||||
|
||||
#ifdef _XM_F16C_INTRINSICS_
|
||||
if ( (avxCPUInfo[2] & 0x38000000 ) != 0x38000000 )
|
||||
return false; // No F16C/AVX/OSXSAVE support
|
||||
#else
|
||||
if ( (avxCPUInfo[2] & 0x18000000 ) != 0x18000000 )
|
||||
return false; // No AVX/OSXSAVE support
|
||||
if ((CPUInfo[2] & 0x38080001) != 0x38080001)
|
||||
return false; // No F16C/AVX/OSXSAVE/SSE4.1/SSE3 support
|
||||
#elif defined(_XM_AVX_INTRINSICS_)
|
||||
if ((CPUInfo[2] & 0x18080001) != 0x18080001)
|
||||
return false; // No AVX/OSXSAVE/SSE4.1/SSE3 support
|
||||
#elif defined(_XM_SSE4_INTRINSICS_)
|
||||
if ((CPUInfo[2] & 0x80001) != 0x80001)
|
||||
return false; // No SSE3/SSE4.1 support
|
||||
#elif defined(_XM_SSE3_INTRINSICS_)
|
||||
if (!(CPUInfo[2] & 0x1))
|
||||
return false; // No SSE3 support
|
||||
#endif
|
||||
#endif
|
||||
#ifdef _XM_SSE4_INTRINSICS_
|
||||
int CPUInfo[4] = {-1};
|
||||
__cpuid( CPUInfo, 0 );
|
||||
|
||||
if ( CPUInfo[0] < 1 )
|
||||
return false;
|
||||
// The x64 processor model requires SSE2 support, but no harm in checking
|
||||
if ((CPUInfo[3] & 0x6000000) != 0x6000000)
|
||||
return false; // No SSE2/SSE support
|
||||
|
||||
__cpuid(CPUInfo, 1 );
|
||||
|
||||
if ( (CPUInfo[2] & 0x80001) != 0x80001 )
|
||||
return false; // Missing SSE3 or SSE 4.1 support
|
||||
#endif
|
||||
#if defined(_M_X64)
|
||||
// The X64 processor model requires SSE2 support
|
||||
return true;
|
||||
#elif defined(PF_XMMI_INSTRUCTIONS_AVAILABLE)
|
||||
// Note that on Windows 2000 or older, SSE2 detection is not supported so this will always fail
|
||||
// Detecting SSE2 on older versions of Windows would require using cpuid directly
|
||||
return ( IsProcessorFeaturePresent( PF_XMMI_INSTRUCTIONS_AVAILABLE ) != 0 && IsProcessorFeaturePresent( PF_XMMI64_INSTRUCTIONS_AVAILABLE ) != 0 );
|
||||
#else
|
||||
// If windows.h is not included, we return false (likely a false negative)
|
||||
return false;
|
||||
#endif
|
||||
#elif defined(_XM_ARM_NEON_INTRINSICS_) && !defined(_XM_NO_INTRINSICS_)
|
||||
#ifdef PF_ARM_NEON_INSTRUCTIONS_AVAILABLE
|
||||
return ( IsProcessorFeaturePresent( PF_ARM_NEON_INSTRUCTIONS_AVAILABLE ) != 0 );
|
||||
#else
|
||||
// If windows.h is not included, we return false (likely a false negative)
|
||||
return false;
|
||||
#endif
|
||||
// ARM-NEON support is required for the Windows on ARM platform
|
||||
return true;
|
||||
#else
|
||||
// No intrinsics path always supported
|
||||
return true;
|
||||
#endif
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user