1
0
mirror of https://github.com/microsoft/DirectXMath synced 2024-09-18 22:29:53 +00:00

Normalize line endings

This commit is contained in:
Chuck Walbourn 2016-08-22 11:43:47 -07:00
parent 31497b9323
commit 7be36d19e3
24 changed files with 44282 additions and 44282 deletions

View File

@ -1,289 +1,289 @@
//-------------------------------------------------------------------------------------
// DirectXMathAVX.h -- AVX (version 1) extensions for SIMD C++ Math library
//
// THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF
// ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO
// THE IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A
// PARTICULAR PURPOSE.
//
// Copyright (c) Microsoft Corporation. All rights reserved.
//
// http://go.microsoft.com/fwlink/?LinkID=615560
//-------------------------------------------------------------------------------------
#ifdef _MSC_VER
#pragma once
#endif
#ifdef _M_ARM
#error AVX not supported on ARM platform
#endif
#if defined(_MSC_VER) && (_MSC_VER < 1600)
#error AVX intrinsics requires Visual C++ 2010 Service Pack 1 or later.
#endif
#pragma warning(push)
#pragma warning(disable : 4987)
#include <intrin.h>
#pragma warning(pop)
#include <immintrin.h>
#include <DirectXMath.h>
namespace DirectX
{
#if (DIRECTXMATH_VERSION < 305) && !defined(XM_CALLCONV)
#define XM_CALLCONV __fastcall
typedef const DirectX::XMVECTOR& HXMVECTOR;
typedef const DirectX::XMMATRIX& FXMMATRIX;
#endif
namespace AVX
{
inline bool XMVerifyAVXSupport()
{
// Should return true for AMD Bulldozer, Intel "Sandy Bridge", and Intel "Ivy Bridge" or later processors
// with OS support for AVX (Windows 7 Service Pack 1, Windows Server 2008 R2 Service Pack 1, Windows 8, Windows Server 2012)
// See http://msdn.microsoft.com/en-us/library/hskdteyh.aspx
int CPUInfo[4] = {-1};
__cpuid( CPUInfo, 0 );
if ( CPUInfo[0] < 1 )
return false;
__cpuid(CPUInfo, 1 );
// We check for AVX, OSXSAVE, SSSE4.1, and SSE3
return ( (CPUInfo[2] & 0x18080001) == 0x18080001 );
}
//-------------------------------------------------------------------------------------
// Vector
//-------------------------------------------------------------------------------------
inline XMVECTOR XM_CALLCONV XMVectorReplicatePtr( _In_ const float *pValue )
{
return _mm_broadcast_ss( pValue );
}
inline XMVECTOR XM_CALLCONV XMVectorSplatX( FXMVECTOR V )
{
return _mm_permute_ps( V, _MM_SHUFFLE(0, 0, 0, 0) );
}
inline XMVECTOR XM_CALLCONV XMVectorSplatY( FXMVECTOR V )
{
return _mm_permute_ps( V, _MM_SHUFFLE(1, 1, 1, 1) );
}
inline XMVECTOR XM_CALLCONV XMVectorSplatZ( FXMVECTOR V )
{
return _mm_permute_ps( V, _MM_SHUFFLE(2, 2, 2, 2) );
}
inline XMVECTOR XM_CALLCONV XMVectorSplatW( FXMVECTOR V )
{
return _mm_permute_ps( V, _MM_SHUFFLE(3, 3, 3, 3) );
}
inline XMVECTOR XM_CALLCONV XMVectorSwizzle( FXMVECTOR V, uint32_t E0, uint32_t E1, uint32_t E2, uint32_t E3 )
{
assert( (E0 < 4) && (E1 < 4) && (E2 < 4) && (E3 < 4) );
_Analysis_assume_( (E0 < 4) && (E1 < 4) && (E2 < 4) && (E3 < 4) );
unsigned int elem[4] = { E0, E1, E2, E3 };
__m128i vControl = _mm_loadu_si128( reinterpret_cast<const __m128i *>(&elem[0]) );
return _mm_permutevar_ps( V, vControl );
}
inline XMVECTOR XM_CALLCONV XMVectorPermute( FXMVECTOR V1, FXMVECTOR V2, uint32_t PermuteX, uint32_t PermuteY, uint32_t PermuteZ, uint32_t PermuteW )
{
assert( PermuteX <= 7 && PermuteY <= 7 && PermuteZ <= 7 && PermuteW <= 7 );
_Analysis_assume_( PermuteX <= 7 && PermuteY <= 7 && PermuteZ <= 7 && PermuteW <= 7 );
static const XMVECTORU32 three = { 3, 3, 3, 3 };
_declspec(align(16)) unsigned int elem[4] = { PermuteX, PermuteY, PermuteZ, PermuteW };
__m128i vControl = _mm_load_si128( reinterpret_cast<const __m128i *>(&elem[0]) );
__m128i vSelect = _mm_cmpgt_epi32( vControl, three );
vControl = _mm_castps_si128( _mm_and_ps( _mm_castsi128_ps( vControl ), three ) );
__m128 shuffled1 = _mm_permutevar_ps( V1, vControl );
__m128 shuffled2 = _mm_permutevar_ps( V2, vControl );
__m128 masked1 = _mm_andnot_ps( _mm_castsi128_ps( vSelect ), shuffled1 );
__m128 masked2 = _mm_and_ps( _mm_castsi128_ps( vSelect ), shuffled2 );
return _mm_or_ps( masked1, masked2 );
}
inline XMVECTOR XM_CALLCONV XMVectorShiftLeft(FXMVECTOR V1, FXMVECTOR V2, uint32_t Elements)
{
assert( Elements < 4 );
_Analysis_assume_( Elements < 4 );
return AVX::XMVectorPermute(V1, V2, Elements, ((Elements) + 1), ((Elements) + 2), ((Elements) + 3));
}
inline XMVECTOR XM_CALLCONV XMVectorRotateLeft(FXMVECTOR V, uint32_t Elements)
{
assert( Elements < 4 );
_Analysis_assume_( Elements < 4 );
return AVX::XMVectorSwizzle( V, Elements & 3, (Elements + 1) & 3, (Elements + 2) & 3, (Elements + 3) & 3 );
}
inline XMVECTOR XM_CALLCONV XMVectorRotateRight(FXMVECTOR V, uint32_t Elements)
{
assert( Elements < 4 );
_Analysis_assume_( Elements < 4 );
return AVX::XMVectorSwizzle( V, (4 - (Elements)) & 3, (5 - (Elements)) & 3, (6 - (Elements)) & 3, (7 - (Elements)) & 3 );
}
//-------------------------------------------------------------------------------------
// Permute Templates
//-------------------------------------------------------------------------------------
namespace Internal
{
// Slow path fallback for permutes that do not map to a single SSE opcode.
template<uint32_t Shuffle, bool WhichX, bool WhichY, bool WhichZ, bool WhichW> struct PermuteHelper
{
static XMVECTOR XM_CALLCONV Permute(FXMVECTOR v1, FXMVECTOR v2)
{
static const XMVECTORU32 selectMask =
{
WhichX ? 0xFFFFFFFF : 0,
WhichY ? 0xFFFFFFFF : 0,
WhichZ ? 0xFFFFFFFF : 0,
WhichW ? 0xFFFFFFFF : 0,
};
XMVECTOR shuffled1 = _mm_permute_ps(v1, Shuffle);
XMVECTOR shuffled2 = _mm_permute_ps(v2, Shuffle);
XMVECTOR masked1 = _mm_andnot_ps(selectMask, shuffled1);
XMVECTOR masked2 = _mm_and_ps(selectMask, shuffled2);
return _mm_or_ps(masked1, masked2);
}
};
// Fast path for permutes that only read from the first vector.
template<uint32_t Shuffle> struct PermuteHelper<Shuffle, false, false, false, false>
{
static XMVECTOR XM_CALLCONV Permute(FXMVECTOR v1, FXMVECTOR v2) { (v2); return _mm_permute_ps(v1, Shuffle); }
};
// Fast path for permutes that only read from the second vector.
template<uint32_t Shuffle> struct PermuteHelper<Shuffle, true, true, true, true>
{
static XMVECTOR XM_CALLCONV Permute(FXMVECTOR v1, FXMVECTOR v2){ (v1); return _mm_permute_ps(v2, Shuffle); }
};
// Fast path for permutes that read XY from the first vector, ZW from the second.
template<uint32_t Shuffle> struct PermuteHelper<Shuffle, false, false, true, true>
{
static XMVECTOR XM_CALLCONV Permute(FXMVECTOR v1, FXMVECTOR v2) { return _mm_shuffle_ps(v1, v2, Shuffle); }
};
// Fast path for permutes that read XY from the second vector, ZW from the first.
template<uint32_t Shuffle> struct PermuteHelper<Shuffle, true, true, false, false>
{
static XMVECTOR XM_CALLCONV Permute(FXMVECTOR v1, FXMVECTOR v2) { return _mm_shuffle_ps(v2, v1, Shuffle); }
};
};
// General permute template
template<uint32_t PermuteX, uint32_t PermuteY, uint32_t PermuteZ, uint32_t PermuteW>
inline XMVECTOR XM_CALLCONV XMVectorPermute(FXMVECTOR V1, FXMVECTOR V2)
{
static_assert(PermuteX <= 7, "PermuteX template parameter out of range");
static_assert(PermuteY <= 7, "PermuteY template parameter out of range");
static_assert(PermuteZ <= 7, "PermuteZ template parameter out of range");
static_assert(PermuteW <= 7, "PermuteW template parameter out of range");
const uint32_t Shuffle = _MM_SHUFFLE(PermuteW & 3, PermuteZ & 3, PermuteY & 3, PermuteX & 3);
const bool WhichX = PermuteX > 3;
const bool WhichY = PermuteY > 3;
const bool WhichZ = PermuteZ > 3;
const bool WhichW = PermuteW > 3;
return AVX::Internal::PermuteHelper<Shuffle, WhichX, WhichY, WhichZ, WhichW>::Permute(V1, V2);
}
// Special-case permute templates
template<> inline XMVECTOR XM_CALLCONV XMVectorPermute<0,1,2,3>(FXMVECTOR V1, FXMVECTOR V2) { (V2); return V1; }
template<> inline XMVECTOR XM_CALLCONV XMVectorPermute<4,5,6,7>(FXMVECTOR V1, FXMVECTOR V2) { (V1); return V2; }
template<> inline XMVECTOR XM_CALLCONV XMVectorPermute<4,1,2,3>(FXMVECTOR V1, FXMVECTOR V2) { return _mm_blend_ps(V1,V2,0x1); }
template<> inline XMVECTOR XM_CALLCONV XMVectorPermute<0,5,2,3>(FXMVECTOR V1, FXMVECTOR V2) { return _mm_blend_ps(V1,V2,0x2); }
template<> inline XMVECTOR XM_CALLCONV XMVectorPermute<4,5,2,3>(FXMVECTOR V1, FXMVECTOR V2) { return _mm_blend_ps(V1,V2,0x3); }
template<> inline XMVECTOR XM_CALLCONV XMVectorPermute<0,1,6,3>(FXMVECTOR V1, FXMVECTOR V2) { return _mm_blend_ps(V1,V2,0x4); }
template<> inline XMVECTOR XM_CALLCONV XMVectorPermute<4,1,6,3>(FXMVECTOR V1, FXMVECTOR V2) { return _mm_blend_ps(V1,V2,0x5); }
template<> inline XMVECTOR XM_CALLCONV XMVectorPermute<0,5,6,3>(FXMVECTOR V1, FXMVECTOR V2) { return _mm_blend_ps(V1,V2,0x6); }
template<> inline XMVECTOR XM_CALLCONV XMVectorPermute<4,5,6,3>(FXMVECTOR V1, FXMVECTOR V2) { return _mm_blend_ps(V1,V2,0x7); }
template<> inline XMVECTOR XM_CALLCONV XMVectorPermute<0,1,2,7>(FXMVECTOR V1, FXMVECTOR V2) { return _mm_blend_ps(V1,V2,0x8); }
template<> inline XMVECTOR XM_CALLCONV XMVectorPermute<4,1,2,7>(FXMVECTOR V1, FXMVECTOR V2) { return _mm_blend_ps(V1,V2,0x9); }
template<> inline XMVECTOR XM_CALLCONV XMVectorPermute<0,5,2,7>(FXMVECTOR V1, FXMVECTOR V2) { return _mm_blend_ps(V1,V2,0xA); }
template<> inline XMVECTOR XM_CALLCONV XMVectorPermute<4,5,2,7>(FXMVECTOR V1, FXMVECTOR V2) { return _mm_blend_ps(V1,V2,0xB); }
template<> inline XMVECTOR XM_CALLCONV XMVectorPermute<0,1,6,7>(FXMVECTOR V1, FXMVECTOR V2) { return _mm_blend_ps(V1,V2,0xC); }
template<> inline XMVECTOR XM_CALLCONV XMVectorPermute<4,1,6,7>(FXMVECTOR V1, FXMVECTOR V2) { return _mm_blend_ps(V1,V2,0xD); }
template<> inline XMVECTOR XM_CALLCONV XMVectorPermute<0,5,6,7>(FXMVECTOR V1, FXMVECTOR V2) { return _mm_blend_ps(V1,V2,0xE); }
//-------------------------------------------------------------------------------------
// Swizzle Templates
//-------------------------------------------------------------------------------------
// General swizzle template
template<uint32_t SwizzleX, uint32_t SwizzleY, uint32_t SwizzleZ, uint32_t SwizzleW>
inline XMVECTOR XM_CALLCONV XMVectorSwizzle(FXMVECTOR V)
{
static_assert(SwizzleX <= 3, "SwizzleX template parameter out of range");
static_assert(SwizzleY <= 3, "SwizzleY template parameter out of range");
static_assert(SwizzleZ <= 3, "SwizzleZ template parameter out of range");
static_assert(SwizzleW <= 3, "SwizzleW template parameter out of range");
return _mm_permute_ps( V, _MM_SHUFFLE( SwizzleW, SwizzleZ, SwizzleY, SwizzleX ) );
}
// Specialized swizzles
template<> inline XMVECTOR XM_CALLCONV XMVectorSwizzle<0,1,2,3>(FXMVECTOR V) { return V; }
template<> inline XMVECTOR XM_CALLCONV XMVectorSwizzle<0,0,2,2>(FXMVECTOR V) { return _mm_moveldup_ps(V); }
template<> inline XMVECTOR XM_CALLCONV XMVectorSwizzle<1,1,3,3>(FXMVECTOR V) { return _mm_movehdup_ps(V); }
//-------------------------------------------------------------------------------------
// Other Templates
//-------------------------------------------------------------------------------------
template<uint32_t Elements>
inline XMVECTOR XM_CALLCONV XMVectorShiftLeft(FXMVECTOR V1, FXMVECTOR V2)
{
static_assert( Elements < 4, "Elements template parameter out of range" );
return AVX::XMVectorPermute<Elements, (Elements + 1), (Elements + 2), (Elements + 3)>(V1, V2);
}
template<uint32_t Elements>
inline XMVECTOR XM_CALLCONV XMVectorRotateLeft(FXMVECTOR V)
{
static_assert( Elements < 4, "Elements template parameter out of range" );
return AVX::XMVectorSwizzle<Elements & 3, (Elements + 1) & 3, (Elements + 2) & 3, (Elements + 3) & 3>(V);
}
template<uint32_t Elements>
inline XMVECTOR XM_CALLCONV XMVectorRotateRight(FXMVECTOR V)
{
static_assert( Elements < 4, "Elements template parameter out of range" );
return AVX::XMVectorSwizzle<(4 - Elements) & 3, (5 - Elements) & 3, (6 - Elements) & 3, (7 - Elements) & 3>(V);
}
}; // namespace AVX
}; // namespace DirectX;
//-------------------------------------------------------------------------------------
// DirectXMathAVX.h -- AVX (version 1) extensions for SIMD C++ Math library
//
// THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF
// ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO
// THE IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A
// PARTICULAR PURPOSE.
//
// Copyright (c) Microsoft Corporation. All rights reserved.
//
// http://go.microsoft.com/fwlink/?LinkID=615560
//-------------------------------------------------------------------------------------
#ifdef _MSC_VER
#pragma once
#endif
#ifdef _M_ARM
#error AVX not supported on ARM platform
#endif
#if defined(_MSC_VER) && (_MSC_VER < 1600)
#error AVX intrinsics requires Visual C++ 2010 Service Pack 1 or later.
#endif
#pragma warning(push)
#pragma warning(disable : 4987)
#include <intrin.h>
#pragma warning(pop)
#include <immintrin.h>
#include <DirectXMath.h>
namespace DirectX
{
#if (DIRECTXMATH_VERSION < 305) && !defined(XM_CALLCONV)
#define XM_CALLCONV __fastcall
typedef const DirectX::XMVECTOR& HXMVECTOR;
typedef const DirectX::XMMATRIX& FXMMATRIX;
#endif
namespace AVX
{
inline bool XMVerifyAVXSupport()
{
// Should return true for AMD Bulldozer, Intel "Sandy Bridge", and Intel "Ivy Bridge" or later processors
// with OS support for AVX (Windows 7 Service Pack 1, Windows Server 2008 R2 Service Pack 1, Windows 8, Windows Server 2012)
// See http://msdn.microsoft.com/en-us/library/hskdteyh.aspx
int CPUInfo[4] = {-1};
__cpuid( CPUInfo, 0 );
if ( CPUInfo[0] < 1 )
return false;
__cpuid(CPUInfo, 1 );
// We check for AVX, OSXSAVE, SSSE4.1, and SSE3
return ( (CPUInfo[2] & 0x18080001) == 0x18080001 );
}
//-------------------------------------------------------------------------------------
// Vector
//-------------------------------------------------------------------------------------
inline XMVECTOR XM_CALLCONV XMVectorReplicatePtr( _In_ const float *pValue )
{
return _mm_broadcast_ss( pValue );
}
inline XMVECTOR XM_CALLCONV XMVectorSplatX( FXMVECTOR V )
{
return _mm_permute_ps( V, _MM_SHUFFLE(0, 0, 0, 0) );
}
inline XMVECTOR XM_CALLCONV XMVectorSplatY( FXMVECTOR V )
{
return _mm_permute_ps( V, _MM_SHUFFLE(1, 1, 1, 1) );
}
inline XMVECTOR XM_CALLCONV XMVectorSplatZ( FXMVECTOR V )
{
return _mm_permute_ps( V, _MM_SHUFFLE(2, 2, 2, 2) );
}
inline XMVECTOR XM_CALLCONV XMVectorSplatW( FXMVECTOR V )
{
return _mm_permute_ps( V, _MM_SHUFFLE(3, 3, 3, 3) );
}
inline XMVECTOR XM_CALLCONV XMVectorSwizzle( FXMVECTOR V, uint32_t E0, uint32_t E1, uint32_t E2, uint32_t E3 )
{
assert( (E0 < 4) && (E1 < 4) && (E2 < 4) && (E3 < 4) );
_Analysis_assume_( (E0 < 4) && (E1 < 4) && (E2 < 4) && (E3 < 4) );
unsigned int elem[4] = { E0, E1, E2, E3 };
__m128i vControl = _mm_loadu_si128( reinterpret_cast<const __m128i *>(&elem[0]) );
return _mm_permutevar_ps( V, vControl );
}
inline XMVECTOR XM_CALLCONV XMVectorPermute( FXMVECTOR V1, FXMVECTOR V2, uint32_t PermuteX, uint32_t PermuteY, uint32_t PermuteZ, uint32_t PermuteW )
{
assert( PermuteX <= 7 && PermuteY <= 7 && PermuteZ <= 7 && PermuteW <= 7 );
_Analysis_assume_( PermuteX <= 7 && PermuteY <= 7 && PermuteZ <= 7 && PermuteW <= 7 );
static const XMVECTORU32 three = { 3, 3, 3, 3 };
_declspec(align(16)) unsigned int elem[4] = { PermuteX, PermuteY, PermuteZ, PermuteW };
__m128i vControl = _mm_load_si128( reinterpret_cast<const __m128i *>(&elem[0]) );
__m128i vSelect = _mm_cmpgt_epi32( vControl, three );
vControl = _mm_castps_si128( _mm_and_ps( _mm_castsi128_ps( vControl ), three ) );
__m128 shuffled1 = _mm_permutevar_ps( V1, vControl );
__m128 shuffled2 = _mm_permutevar_ps( V2, vControl );
__m128 masked1 = _mm_andnot_ps( _mm_castsi128_ps( vSelect ), shuffled1 );
__m128 masked2 = _mm_and_ps( _mm_castsi128_ps( vSelect ), shuffled2 );
return _mm_or_ps( masked1, masked2 );
}
inline XMVECTOR XM_CALLCONV XMVectorShiftLeft(FXMVECTOR V1, FXMVECTOR V2, uint32_t Elements)
{
assert( Elements < 4 );
_Analysis_assume_( Elements < 4 );
return AVX::XMVectorPermute(V1, V2, Elements, ((Elements) + 1), ((Elements) + 2), ((Elements) + 3));
}
inline XMVECTOR XM_CALLCONV XMVectorRotateLeft(FXMVECTOR V, uint32_t Elements)
{
assert( Elements < 4 );
_Analysis_assume_( Elements < 4 );
return AVX::XMVectorSwizzle( V, Elements & 3, (Elements + 1) & 3, (Elements + 2) & 3, (Elements + 3) & 3 );
}
inline XMVECTOR XM_CALLCONV XMVectorRotateRight(FXMVECTOR V, uint32_t Elements)
{
assert( Elements < 4 );
_Analysis_assume_( Elements < 4 );
return AVX::XMVectorSwizzle( V, (4 - (Elements)) & 3, (5 - (Elements)) & 3, (6 - (Elements)) & 3, (7 - (Elements)) & 3 );
}
//-------------------------------------------------------------------------------------
// Permute Templates
//-------------------------------------------------------------------------------------
namespace Internal
{
// Slow path fallback for permutes that do not map to a single SSE opcode.
template<uint32_t Shuffle, bool WhichX, bool WhichY, bool WhichZ, bool WhichW> struct PermuteHelper
{
static XMVECTOR XM_CALLCONV Permute(FXMVECTOR v1, FXMVECTOR v2)
{
static const XMVECTORU32 selectMask =
{
WhichX ? 0xFFFFFFFF : 0,
WhichY ? 0xFFFFFFFF : 0,
WhichZ ? 0xFFFFFFFF : 0,
WhichW ? 0xFFFFFFFF : 0,
};
XMVECTOR shuffled1 = _mm_permute_ps(v1, Shuffle);
XMVECTOR shuffled2 = _mm_permute_ps(v2, Shuffle);
XMVECTOR masked1 = _mm_andnot_ps(selectMask, shuffled1);
XMVECTOR masked2 = _mm_and_ps(selectMask, shuffled2);
return _mm_or_ps(masked1, masked2);
}
};
// Fast path for permutes that only read from the first vector.
template<uint32_t Shuffle> struct PermuteHelper<Shuffle, false, false, false, false>
{
static XMVECTOR XM_CALLCONV Permute(FXMVECTOR v1, FXMVECTOR v2) { (v2); return _mm_permute_ps(v1, Shuffle); }
};
// Fast path for permutes that only read from the second vector.
template<uint32_t Shuffle> struct PermuteHelper<Shuffle, true, true, true, true>
{
static XMVECTOR XM_CALLCONV Permute(FXMVECTOR v1, FXMVECTOR v2){ (v1); return _mm_permute_ps(v2, Shuffle); }
};
// Fast path for permutes that read XY from the first vector, ZW from the second.
template<uint32_t Shuffle> struct PermuteHelper<Shuffle, false, false, true, true>
{
static XMVECTOR XM_CALLCONV Permute(FXMVECTOR v1, FXMVECTOR v2) { return _mm_shuffle_ps(v1, v2, Shuffle); }
};
// Fast path for permutes that read XY from the second vector, ZW from the first.
template<uint32_t Shuffle> struct PermuteHelper<Shuffle, true, true, false, false>
{
static XMVECTOR XM_CALLCONV Permute(FXMVECTOR v1, FXMVECTOR v2) { return _mm_shuffle_ps(v2, v1, Shuffle); }
};
};
// General permute template
template<uint32_t PermuteX, uint32_t PermuteY, uint32_t PermuteZ, uint32_t PermuteW>
inline XMVECTOR XM_CALLCONV XMVectorPermute(FXMVECTOR V1, FXMVECTOR V2)
{
static_assert(PermuteX <= 7, "PermuteX template parameter out of range");
static_assert(PermuteY <= 7, "PermuteY template parameter out of range");
static_assert(PermuteZ <= 7, "PermuteZ template parameter out of range");
static_assert(PermuteW <= 7, "PermuteW template parameter out of range");
const uint32_t Shuffle = _MM_SHUFFLE(PermuteW & 3, PermuteZ & 3, PermuteY & 3, PermuteX & 3);
const bool WhichX = PermuteX > 3;
const bool WhichY = PermuteY > 3;
const bool WhichZ = PermuteZ > 3;
const bool WhichW = PermuteW > 3;
return AVX::Internal::PermuteHelper<Shuffle, WhichX, WhichY, WhichZ, WhichW>::Permute(V1, V2);
}
// Special-case permute templates
template<> inline XMVECTOR XM_CALLCONV XMVectorPermute<0,1,2,3>(FXMVECTOR V1, FXMVECTOR V2) { (V2); return V1; }
template<> inline XMVECTOR XM_CALLCONV XMVectorPermute<4,5,6,7>(FXMVECTOR V1, FXMVECTOR V2) { (V1); return V2; }
template<> inline XMVECTOR XM_CALLCONV XMVectorPermute<4,1,2,3>(FXMVECTOR V1, FXMVECTOR V2) { return _mm_blend_ps(V1,V2,0x1); }
template<> inline XMVECTOR XM_CALLCONV XMVectorPermute<0,5,2,3>(FXMVECTOR V1, FXMVECTOR V2) { return _mm_blend_ps(V1,V2,0x2); }
template<> inline XMVECTOR XM_CALLCONV XMVectorPermute<4,5,2,3>(FXMVECTOR V1, FXMVECTOR V2) { return _mm_blend_ps(V1,V2,0x3); }
template<> inline XMVECTOR XM_CALLCONV XMVectorPermute<0,1,6,3>(FXMVECTOR V1, FXMVECTOR V2) { return _mm_blend_ps(V1,V2,0x4); }
template<> inline XMVECTOR XM_CALLCONV XMVectorPermute<4,1,6,3>(FXMVECTOR V1, FXMVECTOR V2) { return _mm_blend_ps(V1,V2,0x5); }
template<> inline XMVECTOR XM_CALLCONV XMVectorPermute<0,5,6,3>(FXMVECTOR V1, FXMVECTOR V2) { return _mm_blend_ps(V1,V2,0x6); }
template<> inline XMVECTOR XM_CALLCONV XMVectorPermute<4,5,6,3>(FXMVECTOR V1, FXMVECTOR V2) { return _mm_blend_ps(V1,V2,0x7); }
template<> inline XMVECTOR XM_CALLCONV XMVectorPermute<0,1,2,7>(FXMVECTOR V1, FXMVECTOR V2) { return _mm_blend_ps(V1,V2,0x8); }
template<> inline XMVECTOR XM_CALLCONV XMVectorPermute<4,1,2,7>(FXMVECTOR V1, FXMVECTOR V2) { return _mm_blend_ps(V1,V2,0x9); }
template<> inline XMVECTOR XM_CALLCONV XMVectorPermute<0,5,2,7>(FXMVECTOR V1, FXMVECTOR V2) { return _mm_blend_ps(V1,V2,0xA); }
template<> inline XMVECTOR XM_CALLCONV XMVectorPermute<4,5,2,7>(FXMVECTOR V1, FXMVECTOR V2) { return _mm_blend_ps(V1,V2,0xB); }
template<> inline XMVECTOR XM_CALLCONV XMVectorPermute<0,1,6,7>(FXMVECTOR V1, FXMVECTOR V2) { return _mm_blend_ps(V1,V2,0xC); }
template<> inline XMVECTOR XM_CALLCONV XMVectorPermute<4,1,6,7>(FXMVECTOR V1, FXMVECTOR V2) { return _mm_blend_ps(V1,V2,0xD); }
template<> inline XMVECTOR XM_CALLCONV XMVectorPermute<0,5,6,7>(FXMVECTOR V1, FXMVECTOR V2) { return _mm_blend_ps(V1,V2,0xE); }
//-------------------------------------------------------------------------------------
// Swizzle Templates
//-------------------------------------------------------------------------------------
// General swizzle template
template<uint32_t SwizzleX, uint32_t SwizzleY, uint32_t SwizzleZ, uint32_t SwizzleW>
inline XMVECTOR XM_CALLCONV XMVectorSwizzle(FXMVECTOR V)
{
static_assert(SwizzleX <= 3, "SwizzleX template parameter out of range");
static_assert(SwizzleY <= 3, "SwizzleY template parameter out of range");
static_assert(SwizzleZ <= 3, "SwizzleZ template parameter out of range");
static_assert(SwizzleW <= 3, "SwizzleW template parameter out of range");
return _mm_permute_ps( V, _MM_SHUFFLE( SwizzleW, SwizzleZ, SwizzleY, SwizzleX ) );
}
// Specialized swizzles
template<> inline XMVECTOR XM_CALLCONV XMVectorSwizzle<0,1,2,3>(FXMVECTOR V) { return V; }
template<> inline XMVECTOR XM_CALLCONV XMVectorSwizzle<0,0,2,2>(FXMVECTOR V) { return _mm_moveldup_ps(V); }
template<> inline XMVECTOR XM_CALLCONV XMVectorSwizzle<1,1,3,3>(FXMVECTOR V) { return _mm_movehdup_ps(V); }
//-------------------------------------------------------------------------------------
// Other Templates
//-------------------------------------------------------------------------------------
template<uint32_t Elements>
inline XMVECTOR XM_CALLCONV XMVectorShiftLeft(FXMVECTOR V1, FXMVECTOR V2)
{
static_assert( Elements < 4, "Elements template parameter out of range" );
return AVX::XMVectorPermute<Elements, (Elements + 1), (Elements + 2), (Elements + 3)>(V1, V2);
}
template<uint32_t Elements>
inline XMVECTOR XM_CALLCONV XMVectorRotateLeft(FXMVECTOR V)
{
static_assert( Elements < 4, "Elements template parameter out of range" );
return AVX::XMVectorSwizzle<Elements & 3, (Elements + 1) & 3, (Elements + 2) & 3, (Elements + 3) & 3>(V);
}
template<uint32_t Elements>
inline XMVECTOR XM_CALLCONV XMVectorRotateRight(FXMVECTOR V)
{
static_assert( Elements < 4, "Elements template parameter out of range" );
return AVX::XMVectorSwizzle<(4 - Elements) & 3, (5 - Elements) & 3, (6 - Elements) & 3, (7 - Elements) & 3>(V);
}
}; // namespace AVX
}; // namespace DirectX;

File diff suppressed because it is too large Load Diff

View File

@ -1,103 +1,103 @@
//-------------------------------------------------------------------------------------
// DirectXMathBE.h -- Big-endian swap extensions for SIMD C++ Math library
//
// THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF
// ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO
// THE IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A
// PARTICULAR PURPOSE.
//
// Copyright (c) Microsoft Corporation. All rights reserved.
//
// http://go.microsoft.com/fwlink/?LinkID=615560
//-------------------------------------------------------------------------------------
#ifdef _MSC_VER
#pragma once
#endif
#pragma warning(push)
#pragma warning(disable : 4987)
#include <intrin.h>
#pragma warning(pop)
#ifndef _M_ARM
#include <tmmintrin.h>
#endif
#include <DirectXMath.h>
namespace DirectX
{
#if (DIRECTXMATH_VERSION < 305) && !defined(XM_CALLCONV)
#define XM_CALLCONV __fastcall
typedef const DirectX::XMVECTOR& HXMVECTOR;
typedef const DirectX::XMMATRIX& FXMMATRIX;
#endif
inline XMVECTOR XM_CALLCONV XMVectorEndian
(
FXMVECTOR V
)
{
#if defined(_XM_ARM_NEON_INTRINSICS_) && !defined(_XM_NO_INTRINSICS_)
static const XMVECTORU32 idx = { 0x00010203, 0x04050607, 0x08090A0B, 0x0C0D0E0F };
int8x8x2_t tbl;
tbl.val[0] = vget_low_f32(V);
tbl.val[1] = vget_high_f32(V);
const __n64 rL = vtbl2_u8( tbl, vget_low_f32(idx) );
const __n64 rH = vtbl2_u8( tbl, vget_high_f32(idx) );
return vcombine_f32( rL, rH );
#else
XMVECTORU32 E;
E.v = V;
uint32_t value = E.u[0];
E.u[0] = ( (value << 24) | ((value & 0xFF00) << 8) | ((value & 0xFF0000) >> 8) | (value >> 24) );
value = E.u[1];
E.u[1] = ( (value << 24) | ((value & 0xFF00) << 8) | ((value & 0xFF0000) >> 8) | (value >> 24) );
value = E.u[2];
E.u[2] = ( (value << 24) | ((value & 0xFF00) << 8) | ((value & 0xFF0000) >> 8) | (value >> 24) );
value = E.u[3];
E.u[3] = ( (value << 24) | ((value & 0xFF00) << 8) | ((value & 0xFF0000) >> 8) | (value >> 24) );
return E.v;
#endif
}
#ifndef _M_ARM
namespace SSSE3
{
inline bool XMVerifySSSE3Support()
{
// Should return true on AMD Bulldozer, Intel Core i7/i5/i3, Intel Atom, or later processors
// See http://msdn.microsoft.com/en-us/library/hskdteyh.aspx
int CPUInfo[4] = {-1};
__cpuid( CPUInfo, 0 );
if ( CPUInfo[0] < 1 )
return false;
__cpuid(CPUInfo, 1 );
// Check for SSSE3 instruction set.
return ( (CPUInfo[2] & 0x200) != 0 );
}
inline XMVECTOR XM_CALLCONV XMVectorEndian
(
FXMVECTOR V
)
{
static const XMVECTORU32 idx = { 0x00010203, 0x04050607, 0x08090A0B, 0x0C0D0E0F };
__m128i Result = _mm_shuffle_epi8( _mm_castps_si128(V), idx );
return _mm_castsi128_ps( Result );
}
}; // namespace SSSE3
#endif // !_M_ARM
//-------------------------------------------------------------------------------------
// DirectXMathBE.h -- Big-endian swap extensions for SIMD C++ Math library
//
// THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF
// ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO
// THE IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A
// PARTICULAR PURPOSE.
//
// Copyright (c) Microsoft Corporation. All rights reserved.
//
// http://go.microsoft.com/fwlink/?LinkID=615560
//-------------------------------------------------------------------------------------
#ifdef _MSC_VER
#pragma once
#endif
#pragma warning(push)
#pragma warning(disable : 4987)
#include <intrin.h>
#pragma warning(pop)
#ifndef _M_ARM
#include <tmmintrin.h>
#endif
#include <DirectXMath.h>
namespace DirectX
{
#if (DIRECTXMATH_VERSION < 305) && !defined(XM_CALLCONV)
#define XM_CALLCONV __fastcall
typedef const DirectX::XMVECTOR& HXMVECTOR;
typedef const DirectX::XMMATRIX& FXMMATRIX;
#endif
inline XMVECTOR XM_CALLCONV XMVectorEndian
(
FXMVECTOR V
)
{
#if defined(_XM_ARM_NEON_INTRINSICS_) && !defined(_XM_NO_INTRINSICS_)
static const XMVECTORU32 idx = { 0x00010203, 0x04050607, 0x08090A0B, 0x0C0D0E0F };
int8x8x2_t tbl;
tbl.val[0] = vget_low_f32(V);
tbl.val[1] = vget_high_f32(V);
const __n64 rL = vtbl2_u8( tbl, vget_low_f32(idx) );
const __n64 rH = vtbl2_u8( tbl, vget_high_f32(idx) );
return vcombine_f32( rL, rH );
#else
XMVECTORU32 E;
E.v = V;
uint32_t value = E.u[0];
E.u[0] = ( (value << 24) | ((value & 0xFF00) << 8) | ((value & 0xFF0000) >> 8) | (value >> 24) );
value = E.u[1];
E.u[1] = ( (value << 24) | ((value & 0xFF00) << 8) | ((value & 0xFF0000) >> 8) | (value >> 24) );
value = E.u[2];
E.u[2] = ( (value << 24) | ((value & 0xFF00) << 8) | ((value & 0xFF0000) >> 8) | (value >> 24) );
value = E.u[3];
E.u[3] = ( (value << 24) | ((value & 0xFF00) << 8) | ((value & 0xFF0000) >> 8) | (value >> 24) );
return E.v;
#endif
}
#ifndef _M_ARM
namespace SSSE3
{
inline bool XMVerifySSSE3Support()
{
// Should return true on AMD Bulldozer, Intel Core i7/i5/i3, Intel Atom, or later processors
// See http://msdn.microsoft.com/en-us/library/hskdteyh.aspx
int CPUInfo[4] = {-1};
__cpuid( CPUInfo, 0 );
if ( CPUInfo[0] < 1 )
return false;
__cpuid(CPUInfo, 1 );
// Check for SSSE3 instruction set.
return ( (CPUInfo[2] & 0x200) != 0 );
}
inline XMVECTOR XM_CALLCONV XMVectorEndian
(
FXMVECTOR V
)
{
static const XMVECTORU32 idx = { 0x00010203, 0x04050607, 0x08090A0B, 0x0C0D0E0F };
__m128i Result = _mm_shuffle_epi8( _mm_castps_si128(V), idx );
return _mm_castsi128_ps( Result );
}
}; // namespace SSSE3
#endif // !_M_ARM
}; // namespace DirectX;

View File

@ -1,410 +1,410 @@
//-------------------------------------------------------------------------------------
// DirectXMathF16C.h -- F16C/CVT16 extensions for SIMD C++ Math library
//
// THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF
// ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO
// THE IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A
// PARTICULAR PURPOSE.
//
// Copyright (c) Microsoft Corporation. All rights reserved.
//
// http://go.microsoft.com/fwlink/?LinkID=615560
//-------------------------------------------------------------------------------------
#ifdef _MSC_VER
#pragma once
#endif
#ifdef _M_ARM
#error F16C not supported on ARM platform
#endif
#if defined(_MSC_VER) && (_MSC_VER < 1700)
#error F16C/CVT16 intrinsics requires Visual C++ 2012 or later.
#endif
#pragma warning(push)
#pragma warning(disable : 4987)
#include <intrin.h>
#pragma warning(pop)
#include <immintrin.h>
#include <DirectXMath.h>
#include <DirectXPackedVector.h>
namespace DirectX
{
#if (DIRECTXMATH_VERSION < 305) && !defined(XM_CALLCONV)
#define XM_CALLCONV __fastcall
typedef const DirectX::XMVECTOR& HXMVECTOR;
typedef const DirectX::XMMATRIX& FXMMATRIX;
#endif
namespace F16C
{
inline bool XMVerifyF16CSupport()
{
// Should return true for AMD "Piledriver" and Intel "Ivy Bridge" processors
// with OS support for AVX (Windows 7 Service Pack 1, Windows Server 2008 R2 Service Pack 1, Windows 8, Windows Server 2012)
// See http://msdn.microsoft.com/en-us/library/hskdteyh.aspx
int CPUInfo[4] = {-1};
__cpuid( CPUInfo, 0 );
if ( CPUInfo[0] < 1 )
return false;
__cpuid(CPUInfo, 1 );
// We check for F16C, AVX, OSXSAVE, and SSE4.1
return ( (CPUInfo[2] & 0x38080000 ) == 0x38080000 );
}
//-------------------------------------------------------------------------------------
// Data conversion
//-------------------------------------------------------------------------------------
inline float XMConvertHalfToFloat( PackedVector::HALF Value )
{
__m128i V1 = _mm_cvtsi32_si128( static_cast<uint32_t>(Value) );
__m128 V2 = _mm_cvtph_ps( V1 );
return _mm_cvtss_f32( V2 );
}
inline PackedVector::HALF XMConvertFloatToHalf( float Value )
{
__m128 V1 = _mm_set_ss( Value );
__m128i V2 = _mm_cvtps_ph( V1, 0 );
return static_cast<PackedVector::HALF>( _mm_cvtsi128_si32(V2) );
}
inline float* XMConvertHalfToFloatStream
(
_Out_writes_bytes_(sizeof(float)+OutputStride*(HalfCount-1)) float* pOutputStream,
_In_ size_t OutputStride,
_In_reads_bytes_(2+InputStride*(HalfCount-1)) const PackedVector::HALF* pInputStream,
_In_ size_t InputStride,
_In_ size_t HalfCount
)
{
using namespace PackedVector;
assert(pOutputStream);
assert(pInputStream);
const uint8_t* pHalf = reinterpret_cast<const uint8_t*>(pInputStream);
uint8_t* pFloat = reinterpret_cast<uint8_t*>(pOutputStream);
size_t i = 0;
size_t four = HalfCount >> 2;
if ( four > 0 )
{
if (InputStride == sizeof(HALF))
{
if (OutputStride == sizeof(float))
{
if ( ((uintptr_t)pFloat & 0xF) == 0)
{
// Packed input, aligned & packed output
for (size_t j = 0; j < four; ++j)
{
__m128i HV = _mm_loadl_epi64( reinterpret_cast<const __m128i*>(pHalf) );
pHalf += InputStride*4;
__m128 FV = _mm_cvtph_ps( HV );
_mm_stream_ps( reinterpret_cast<float*>(pFloat), FV );
pFloat += OutputStride*4;
i += 4;
}
}
else
{
// Packed input, packed output
for (size_t j = 0; j < four; ++j)
{
__m128i HV = _mm_loadl_epi64( reinterpret_cast<const __m128i*>(pHalf) );
pHalf += InputStride*4;
__m128 FV = _mm_cvtph_ps( HV );
_mm_storeu_ps( reinterpret_cast<float*>(pFloat), FV );
pFloat += OutputStride*4;
i += 4;
}
}
}
else
{
// Packed input, scattered output
for (size_t j = 0; j < four; ++j)
{
__m128i HV = _mm_loadl_epi64( reinterpret_cast<const __m128i*>(pHalf) );
pHalf += InputStride*4;
__m128 FV = _mm_cvtph_ps( HV );
_mm_store_ss( reinterpret_cast<float*>(pFloat), FV );
pFloat += OutputStride;
*reinterpret_cast<int*>(pFloat) = _mm_extract_ps( FV, 1 );
pFloat += OutputStride;
*reinterpret_cast<int*>(pFloat) = _mm_extract_ps( FV, 2 );
pFloat += OutputStride;
*reinterpret_cast<int*>(pFloat) = _mm_extract_ps( FV, 3 );
pFloat += OutputStride;
i += 4;
}
}
}
else if (OutputStride == sizeof(float))
{
if ( ((uintptr_t)pFloat & 0xF) == 0)
{
// Scattered input, aligned & packed output
for (size_t j = 0; j < four; ++j)
{
uint16_t H1 = *reinterpret_cast<const HALF*>(pHalf);
pHalf += InputStride;
uint16_t H2 = *reinterpret_cast<const HALF*>(pHalf);
pHalf += InputStride;
uint16_t H3 = *reinterpret_cast<const HALF*>(pHalf);
pHalf += InputStride;
uint16_t H4 = *reinterpret_cast<const HALF*>(pHalf);
pHalf += InputStride;
__m128i HV = _mm_setzero_si128();
HV = _mm_insert_epi16( HV, H1, 0 );
HV = _mm_insert_epi16( HV, H2, 1 );
HV = _mm_insert_epi16( HV, H3, 2 );
HV = _mm_insert_epi16( HV, H4, 3 );
__m128 FV = _mm_cvtph_ps( HV );
_mm_stream_ps( reinterpret_cast<float*>(pFloat ), FV );
pFloat += OutputStride*4;
i += 4;
}
}
else
{
// Scattered input, packed output
for (size_t j = 0; j < four; ++j)
{
uint16_t H1 = *reinterpret_cast<const HALF*>(pHalf);
pHalf += InputStride;
uint16_t H2 = *reinterpret_cast<const HALF*>(pHalf);
pHalf += InputStride;
uint16_t H3 = *reinterpret_cast<const HALF*>(pHalf);
pHalf += InputStride;
uint16_t H4 = *reinterpret_cast<const HALF*>(pHalf);
pHalf += InputStride;
__m128i HV = _mm_setzero_si128();
HV = _mm_insert_epi16( HV, H1, 0 );
HV = _mm_insert_epi16( HV, H2, 1 );
HV = _mm_insert_epi16( HV, H3, 2 );
HV = _mm_insert_epi16( HV, H4, 3 );
__m128 FV = _mm_cvtph_ps( HV );
_mm_storeu_ps( reinterpret_cast<float*>(pFloat ), FV );
pFloat += OutputStride*4;
i += 4;
}
}
}
}
for (; i < HalfCount; ++i)
{
*reinterpret_cast<float*>(pFloat) = XMConvertHalfToFloat(reinterpret_cast<const HALF*>(pHalf)[0]);
pHalf += InputStride;
pFloat += OutputStride;
}
return pOutputStream;
}
inline PackedVector::HALF* XMConvertFloatToHalfStream
(
_Out_writes_bytes_(2+OutputStride*(FloatCount-1)) PackedVector::HALF* pOutputStream,
_In_ size_t OutputStride,
_In_reads_bytes_(sizeof(float)+InputStride*(FloatCount-1)) const float* pInputStream,
_In_ size_t InputStride,
_In_ size_t FloatCount
)
{
using namespace PackedVector;
assert(pOutputStream);
assert(pInputStream);
const uint8_t* pFloat = reinterpret_cast<const uint8_t*>(pInputStream);
uint8_t* pHalf = reinterpret_cast<uint8_t*>(pOutputStream);
size_t i = 0;
size_t four = FloatCount >> 2;
if (four > 0)
{
if (InputStride == sizeof(float))
{
if (OutputStride == sizeof(HALF))
{
if ( ((uintptr_t)pFloat & 0xF) == 0)
{
// Aligned and packed input, packed output
for (size_t j = 0; j < four; ++j)
{
__m128 FV = _mm_load_ps( reinterpret_cast<const float*>(pFloat) );
pFloat += InputStride*4;
__m128i HV = _mm_cvtps_ph( FV, 0 );
_mm_storel_epi64( reinterpret_cast<__m128i*>(pHalf), HV );
pHalf += OutputStride*4;
i += 4;
}
}
else
{
// Packed input, packed output
for (size_t j = 0; j < four; ++j)
{
__m128 FV = _mm_loadu_ps( reinterpret_cast<const float*>(pFloat) );
pFloat += InputStride*4;
__m128i HV = _mm_cvtps_ph( FV, 0 );
_mm_storel_epi64( reinterpret_cast<__m128i*>(pHalf), HV );
pHalf += OutputStride*4;
i += 4;
}
}
}
else
{
if ( ((uintptr_t)pFloat & 0xF) == 0)
{
// Aligned & packed input, scattered output
for (size_t j = 0; j < four; ++j)
{
__m128 FV = _mm_load_ps( reinterpret_cast<const float*>(pFloat) );
pFloat += InputStride*4;
__m128i HV = _mm_cvtps_ph( FV, 0 );
*reinterpret_cast<HALF*>(pHalf) = static_cast<HALF>( _mm_extract_epi16( HV, 0 ) );
pHalf += OutputStride;
*reinterpret_cast<HALF*>(pHalf) = static_cast<HALF>( _mm_extract_epi16( HV, 1 ) );
pHalf += OutputStride;
*reinterpret_cast<HALF*>(pHalf) = static_cast<HALF>( _mm_extract_epi16( HV, 2 ) );
pHalf += OutputStride;
*reinterpret_cast<HALF*>(pHalf) = static_cast<HALF>( _mm_extract_epi16( HV, 3 ) );
pHalf += OutputStride;
i += 4;
}
}
else
{
// Packed input, scattered output
for (size_t j = 0; j < four; ++j)
{
__m128 FV = _mm_loadu_ps( reinterpret_cast<const float*>(pFloat) );
pFloat += InputStride*4;
__m128i HV = _mm_cvtps_ph( FV, 0 );
*reinterpret_cast<HALF*>(pHalf) = static_cast<HALF>( _mm_extract_epi16( HV, 0 ) );
pHalf += OutputStride;
*reinterpret_cast<HALF*>(pHalf) = static_cast<HALF>( _mm_extract_epi16( HV, 1 ) );
pHalf += OutputStride;
*reinterpret_cast<HALF*>(pHalf) = static_cast<HALF>( _mm_extract_epi16( HV, 2 ) );
pHalf += OutputStride;
*reinterpret_cast<HALF*>(pHalf) = static_cast<HALF>( _mm_extract_epi16( HV, 3 ) );
pHalf += OutputStride;
i += 4;
}
}
}
}
else if (OutputStride == sizeof(HALF))
{
// Scattered input, packed output
for (size_t j = 0; j < four; ++j)
{
__m128 FV1 = _mm_load_ss( reinterpret_cast<const float*>(pFloat) );
pFloat += InputStride;
__m128 FV2 = _mm_broadcast_ss( reinterpret_cast<const float*>(pFloat) );
pFloat += InputStride;
__m128 FV3 = _mm_broadcast_ss( reinterpret_cast<const float*>(pFloat) );
pFloat += InputStride;
__m128 FV4 = _mm_broadcast_ss( reinterpret_cast<const float*>(pFloat) );
pFloat += InputStride;
__m128 FV = _mm_blend_ps( FV1, FV2, 0x2 );
__m128 FT = _mm_blend_ps( FV3, FV4, 0x8 );
FV = _mm_blend_ps( FV, FT, 0xC );
__m128i HV = _mm_cvtps_ph( FV, 0 );
_mm_storel_epi64( reinterpret_cast<__m128i*>(pHalf), HV );
pHalf += OutputStride*4;
i += 4;
}
}
}
for (; i < FloatCount; ++i)
{
*reinterpret_cast<HALF*>(pHalf) = XMConvertFloatToHalf(reinterpret_cast<const float*>(pFloat)[0]);
pFloat += InputStride;
pHalf += OutputStride;
}
return pOutputStream;
}
//-------------------------------------------------------------------------------------
// Half2
//-------------------------------------------------------------------------------------
inline XMVECTOR XM_CALLCONV XMLoadHalf2( _In_ const PackedVector::XMHALF2* pSource )
{
assert(pSource);
__m128 V = _mm_load_ss( reinterpret_cast<const float*>(pSource) );
return _mm_cvtph_ps( _mm_castps_si128( V ) );
}
inline void XM_CALLCONV XMStoreHalf2( _Out_ PackedVector::XMHALF2* pDestination, _In_ FXMVECTOR V )
{
assert(pDestination);
__m128i V1 = _mm_cvtps_ph( V, 0 );
_mm_store_ss( reinterpret_cast<float*>(pDestination), _mm_castsi128_ps(V1) );
}
//-------------------------------------------------------------------------------------
// Half4
//-------------------------------------------------------------------------------------
inline XMVECTOR XM_CALLCONV XMLoadHalf4( _In_ const PackedVector::XMHALF4* pSource )
{
assert(pSource);
__m128i V = _mm_loadl_epi64( reinterpret_cast<const __m128i*>(pSource) );
return _mm_cvtph_ps( V );
}
inline void XM_CALLCONV XMStoreHalf4( _Out_ PackedVector::XMHALF4* pDestination, _In_ FXMVECTOR V )
{
assert(pDestination);
__m128i V1 = _mm_cvtps_ph( V, 0 );
_mm_storel_epi64( reinterpret_cast<__m128i*>(pDestination), V1 );
}
}; // namespace F16C
//-------------------------------------------------------------------------------------
// DirectXMathF16C.h -- F16C/CVT16 extensions for SIMD C++ Math library
//
// THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF
// ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO
// THE IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A
// PARTICULAR PURPOSE.
//
// Copyright (c) Microsoft Corporation. All rights reserved.
//
// http://go.microsoft.com/fwlink/?LinkID=615560
//-------------------------------------------------------------------------------------
#ifdef _MSC_VER
#pragma once
#endif
#ifdef _M_ARM
#error F16C not supported on ARM platform
#endif
#if defined(_MSC_VER) && (_MSC_VER < 1700)
#error F16C/CVT16 intrinsics requires Visual C++ 2012 or later.
#endif
#pragma warning(push)
#pragma warning(disable : 4987)
#include <intrin.h>
#pragma warning(pop)
#include <immintrin.h>
#include <DirectXMath.h>
#include <DirectXPackedVector.h>
namespace DirectX
{
#if (DIRECTXMATH_VERSION < 305) && !defined(XM_CALLCONV)
#define XM_CALLCONV __fastcall
typedef const DirectX::XMVECTOR& HXMVECTOR;
typedef const DirectX::XMMATRIX& FXMMATRIX;
#endif
namespace F16C
{
inline bool XMVerifyF16CSupport()
{
// Should return true for AMD "Piledriver" and Intel "Ivy Bridge" processors
// with OS support for AVX (Windows 7 Service Pack 1, Windows Server 2008 R2 Service Pack 1, Windows 8, Windows Server 2012)
// See http://msdn.microsoft.com/en-us/library/hskdteyh.aspx
int CPUInfo[4] = {-1};
__cpuid( CPUInfo, 0 );
if ( CPUInfo[0] < 1 )
return false;
__cpuid(CPUInfo, 1 );
// We check for F16C, AVX, OSXSAVE, and SSE4.1
return ( (CPUInfo[2] & 0x38080000 ) == 0x38080000 );
}
//-------------------------------------------------------------------------------------
// Data conversion
//-------------------------------------------------------------------------------------
inline float XMConvertHalfToFloat( PackedVector::HALF Value )
{
__m128i V1 = _mm_cvtsi32_si128( static_cast<uint32_t>(Value) );
__m128 V2 = _mm_cvtph_ps( V1 );
return _mm_cvtss_f32( V2 );
}
inline PackedVector::HALF XMConvertFloatToHalf( float Value )
{
__m128 V1 = _mm_set_ss( Value );
__m128i V2 = _mm_cvtps_ph( V1, 0 );
return static_cast<PackedVector::HALF>( _mm_cvtsi128_si32(V2) );
}
inline float* XMConvertHalfToFloatStream
(
_Out_writes_bytes_(sizeof(float)+OutputStride*(HalfCount-1)) float* pOutputStream,
_In_ size_t OutputStride,
_In_reads_bytes_(2+InputStride*(HalfCount-1)) const PackedVector::HALF* pInputStream,
_In_ size_t InputStride,
_In_ size_t HalfCount
)
{
using namespace PackedVector;
assert(pOutputStream);
assert(pInputStream);
const uint8_t* pHalf = reinterpret_cast<const uint8_t*>(pInputStream);
uint8_t* pFloat = reinterpret_cast<uint8_t*>(pOutputStream);
size_t i = 0;
size_t four = HalfCount >> 2;
if ( four > 0 )
{
if (InputStride == sizeof(HALF))
{
if (OutputStride == sizeof(float))
{
if ( ((uintptr_t)pFloat & 0xF) == 0)
{
// Packed input, aligned & packed output
for (size_t j = 0; j < four; ++j)
{
__m128i HV = _mm_loadl_epi64( reinterpret_cast<const __m128i*>(pHalf) );
pHalf += InputStride*4;
__m128 FV = _mm_cvtph_ps( HV );
_mm_stream_ps( reinterpret_cast<float*>(pFloat), FV );
pFloat += OutputStride*4;
i += 4;
}
}
else
{
// Packed input, packed output
for (size_t j = 0; j < four; ++j)
{
__m128i HV = _mm_loadl_epi64( reinterpret_cast<const __m128i*>(pHalf) );
pHalf += InputStride*4;
__m128 FV = _mm_cvtph_ps( HV );
_mm_storeu_ps( reinterpret_cast<float*>(pFloat), FV );
pFloat += OutputStride*4;
i += 4;
}
}
}
else
{
// Packed input, scattered output
for (size_t j = 0; j < four; ++j)
{
__m128i HV = _mm_loadl_epi64( reinterpret_cast<const __m128i*>(pHalf) );
pHalf += InputStride*4;
__m128 FV = _mm_cvtph_ps( HV );
_mm_store_ss( reinterpret_cast<float*>(pFloat), FV );
pFloat += OutputStride;
*reinterpret_cast<int*>(pFloat) = _mm_extract_ps( FV, 1 );
pFloat += OutputStride;
*reinterpret_cast<int*>(pFloat) = _mm_extract_ps( FV, 2 );
pFloat += OutputStride;
*reinterpret_cast<int*>(pFloat) = _mm_extract_ps( FV, 3 );
pFloat += OutputStride;
i += 4;
}
}
}
else if (OutputStride == sizeof(float))
{
if ( ((uintptr_t)pFloat & 0xF) == 0)
{
// Scattered input, aligned & packed output
for (size_t j = 0; j < four; ++j)
{
uint16_t H1 = *reinterpret_cast<const HALF*>(pHalf);
pHalf += InputStride;
uint16_t H2 = *reinterpret_cast<const HALF*>(pHalf);
pHalf += InputStride;
uint16_t H3 = *reinterpret_cast<const HALF*>(pHalf);
pHalf += InputStride;
uint16_t H4 = *reinterpret_cast<const HALF*>(pHalf);
pHalf += InputStride;
__m128i HV = _mm_setzero_si128();
HV = _mm_insert_epi16( HV, H1, 0 );
HV = _mm_insert_epi16( HV, H2, 1 );
HV = _mm_insert_epi16( HV, H3, 2 );
HV = _mm_insert_epi16( HV, H4, 3 );
__m128 FV = _mm_cvtph_ps( HV );
_mm_stream_ps( reinterpret_cast<float*>(pFloat ), FV );
pFloat += OutputStride*4;
i += 4;
}
}
else
{
// Scattered input, packed output
for (size_t j = 0; j < four; ++j)
{
uint16_t H1 = *reinterpret_cast<const HALF*>(pHalf);
pHalf += InputStride;
uint16_t H2 = *reinterpret_cast<const HALF*>(pHalf);
pHalf += InputStride;
uint16_t H3 = *reinterpret_cast<const HALF*>(pHalf);
pHalf += InputStride;
uint16_t H4 = *reinterpret_cast<const HALF*>(pHalf);
pHalf += InputStride;
__m128i HV = _mm_setzero_si128();
HV = _mm_insert_epi16( HV, H1, 0 );
HV = _mm_insert_epi16( HV, H2, 1 );
HV = _mm_insert_epi16( HV, H3, 2 );
HV = _mm_insert_epi16( HV, H4, 3 );
__m128 FV = _mm_cvtph_ps( HV );
_mm_storeu_ps( reinterpret_cast<float*>(pFloat ), FV );
pFloat += OutputStride*4;
i += 4;
}
}
}
}
for (; i < HalfCount; ++i)
{
*reinterpret_cast<float*>(pFloat) = XMConvertHalfToFloat(reinterpret_cast<const HALF*>(pHalf)[0]);
pHalf += InputStride;
pFloat += OutputStride;
}
return pOutputStream;
}
inline PackedVector::HALF* XMConvertFloatToHalfStream
(
_Out_writes_bytes_(2+OutputStride*(FloatCount-1)) PackedVector::HALF* pOutputStream,
_In_ size_t OutputStride,
_In_reads_bytes_(sizeof(float)+InputStride*(FloatCount-1)) const float* pInputStream,
_In_ size_t InputStride,
_In_ size_t FloatCount
)
{
using namespace PackedVector;
assert(pOutputStream);
assert(pInputStream);
const uint8_t* pFloat = reinterpret_cast<const uint8_t*>(pInputStream);
uint8_t* pHalf = reinterpret_cast<uint8_t*>(pOutputStream);
size_t i = 0;
size_t four = FloatCount >> 2;
if (four > 0)
{
if (InputStride == sizeof(float))
{
if (OutputStride == sizeof(HALF))
{
if ( ((uintptr_t)pFloat & 0xF) == 0)
{
// Aligned and packed input, packed output
for (size_t j = 0; j < four; ++j)
{
__m128 FV = _mm_load_ps( reinterpret_cast<const float*>(pFloat) );
pFloat += InputStride*4;
__m128i HV = _mm_cvtps_ph( FV, 0 );
_mm_storel_epi64( reinterpret_cast<__m128i*>(pHalf), HV );
pHalf += OutputStride*4;
i += 4;
}
}
else
{
// Packed input, packed output
for (size_t j = 0; j < four; ++j)
{
__m128 FV = _mm_loadu_ps( reinterpret_cast<const float*>(pFloat) );
pFloat += InputStride*4;
__m128i HV = _mm_cvtps_ph( FV, 0 );
_mm_storel_epi64( reinterpret_cast<__m128i*>(pHalf), HV );
pHalf += OutputStride*4;
i += 4;
}
}
}
else
{
if ( ((uintptr_t)pFloat & 0xF) == 0)
{
// Aligned & packed input, scattered output
for (size_t j = 0; j < four; ++j)
{
__m128 FV = _mm_load_ps( reinterpret_cast<const float*>(pFloat) );
pFloat += InputStride*4;
__m128i HV = _mm_cvtps_ph( FV, 0 );
*reinterpret_cast<HALF*>(pHalf) = static_cast<HALF>( _mm_extract_epi16( HV, 0 ) );
pHalf += OutputStride;
*reinterpret_cast<HALF*>(pHalf) = static_cast<HALF>( _mm_extract_epi16( HV, 1 ) );
pHalf += OutputStride;
*reinterpret_cast<HALF*>(pHalf) = static_cast<HALF>( _mm_extract_epi16( HV, 2 ) );
pHalf += OutputStride;
*reinterpret_cast<HALF*>(pHalf) = static_cast<HALF>( _mm_extract_epi16( HV, 3 ) );
pHalf += OutputStride;
i += 4;
}
}
else
{
// Packed input, scattered output
for (size_t j = 0; j < four; ++j)
{
__m128 FV = _mm_loadu_ps( reinterpret_cast<const float*>(pFloat) );
pFloat += InputStride*4;
__m128i HV = _mm_cvtps_ph( FV, 0 );
*reinterpret_cast<HALF*>(pHalf) = static_cast<HALF>( _mm_extract_epi16( HV, 0 ) );
pHalf += OutputStride;
*reinterpret_cast<HALF*>(pHalf) = static_cast<HALF>( _mm_extract_epi16( HV, 1 ) );
pHalf += OutputStride;
*reinterpret_cast<HALF*>(pHalf) = static_cast<HALF>( _mm_extract_epi16( HV, 2 ) );
pHalf += OutputStride;
*reinterpret_cast<HALF*>(pHalf) = static_cast<HALF>( _mm_extract_epi16( HV, 3 ) );
pHalf += OutputStride;
i += 4;
}
}
}
}
else if (OutputStride == sizeof(HALF))
{
// Scattered input, packed output
for (size_t j = 0; j < four; ++j)
{
__m128 FV1 = _mm_load_ss( reinterpret_cast<const float*>(pFloat) );
pFloat += InputStride;
__m128 FV2 = _mm_broadcast_ss( reinterpret_cast<const float*>(pFloat) );
pFloat += InputStride;
__m128 FV3 = _mm_broadcast_ss( reinterpret_cast<const float*>(pFloat) );
pFloat += InputStride;
__m128 FV4 = _mm_broadcast_ss( reinterpret_cast<const float*>(pFloat) );
pFloat += InputStride;
__m128 FV = _mm_blend_ps( FV1, FV2, 0x2 );
__m128 FT = _mm_blend_ps( FV3, FV4, 0x8 );
FV = _mm_blend_ps( FV, FT, 0xC );
__m128i HV = _mm_cvtps_ph( FV, 0 );
_mm_storel_epi64( reinterpret_cast<__m128i*>(pHalf), HV );
pHalf += OutputStride*4;
i += 4;
}
}
}
for (; i < FloatCount; ++i)
{
*reinterpret_cast<HALF*>(pHalf) = XMConvertFloatToHalf(reinterpret_cast<const float*>(pFloat)[0]);
pFloat += InputStride;
pHalf += OutputStride;
}
return pOutputStream;
}
//-------------------------------------------------------------------------------------
// Half2
//-------------------------------------------------------------------------------------
inline XMVECTOR XM_CALLCONV XMLoadHalf2( _In_ const PackedVector::XMHALF2* pSource )
{
assert(pSource);
__m128 V = _mm_load_ss( reinterpret_cast<const float*>(pSource) );
return _mm_cvtph_ps( _mm_castps_si128( V ) );
}
inline void XM_CALLCONV XMStoreHalf2( _Out_ PackedVector::XMHALF2* pDestination, _In_ FXMVECTOR V )
{
assert(pDestination);
__m128i V1 = _mm_cvtps_ph( V, 0 );
_mm_store_ss( reinterpret_cast<float*>(pDestination), _mm_castsi128_ps(V1) );
}
//-------------------------------------------------------------------------------------
// Half4
//-------------------------------------------------------------------------------------
inline XMVECTOR XM_CALLCONV XMLoadHalf4( _In_ const PackedVector::XMHALF4* pSource )
{
assert(pSource);
__m128i V = _mm_loadl_epi64( reinterpret_cast<const __m128i*>(pSource) );
return _mm_cvtph_ps( V );
}
inline void XM_CALLCONV XMStoreHalf4( _Out_ PackedVector::XMHALF4* pDestination, _In_ FXMVECTOR V )
{
assert(pDestination);
__m128i V1 = _mm_cvtps_ph( V, 0 );
_mm_storel_epi64( reinterpret_cast<__m128i*>(pDestination), V1 );
}
}; // namespace F16C
}; // namespace DirectX;

View File

@ -1,405 +1,405 @@
//-------------------------------------------------------------------------------------
// DirectXMathFMA3.h -- FMA3 extensions for SIMD C++ Math library
//
// THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF
// ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO
// THE IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A
// PARTICULAR PURPOSE.
//
// Copyright (c) Microsoft Corporation. All rights reserved.
//
// http://go.microsoft.com/fwlink/?LinkID=615560
//-------------------------------------------------------------------------------------
#ifdef _MSC_VER
#pragma once
#endif
#ifdef _M_ARM
#error FMA3 not supported on ARM platform
#endif
#if defined(_MSC_VER) && (_MSC_VER < 1700)
#error FMA3 intrinsics requires Visual C++ 2012 or later.
#endif
#pragma warning(push)
#pragma warning(disable : 4987)
#include <intrin.h>
#pragma warning(pop)
#include <immintrin.h>
#include <DirectXMath.h>
namespace DirectX
{
#if (DIRECTXMATH_VERSION < 305) && !defined(XM_CALLCONV)
#define XM_CALLCONV __fastcall
typedef const DirectX::XMVECTOR& HXMVECTOR;
typedef const DirectX::XMMATRIX& FXMMATRIX;
#endif
namespace FMA3
{
inline bool XMVerifyFMA3Support()
{
// Should return true for AMD "Pildriver" and Intel "Haswell" processors
// with OS support for AVX (Windows 7 Service Pack 1, Windows Server 2008 R2 Service Pack 1, Windows 8, Windows Server 2012)
// See http://msdn.microsoft.com/en-us/library/hskdteyh.aspx
int CPUInfo[4] = {-1};
__cpuid( CPUInfo, 0 );
if ( CPUInfo[0] < 1 )
return false;
__cpuid(CPUInfo, 1 );
// We check for FMA3, AVX, OSXSAVE
return ( (CPUInfo[2] & 0x18001000) == 0x18001000 );
}
//-------------------------------------------------------------------------------------
// Vector
//-------------------------------------------------------------------------------------
inline XMVECTOR XM_CALLCONV XMVectorMultiplyAdd
(
FXMVECTOR V1,
FXMVECTOR V2,
FXMVECTOR V3
)
{
return _mm_fmadd_ps( V1, V2, V3 );
}
inline XMVECTOR XM_CALLCONV XMVectorNegativeMultiplySubtract
(
FXMVECTOR V1,
FXMVECTOR V2,
FXMVECTOR V3
)
{
return _mm_fnmadd_ps( V1, V2, V3 );
}
//-------------------------------------------------------------------------------------
// Vector2
//-------------------------------------------------------------------------------------
inline XMVECTOR XM_CALLCONV XMVector2Transform
(
FXMVECTOR V,
CXMMATRIX M
)
{
XMVECTOR vResult = _mm_permute_ps(V,_MM_SHUFFLE(1,1,1,1)); // Y
vResult = _mm_fmadd_ps( vResult, M.r[1], M.r[3] );
XMVECTOR vTemp = _mm_permute_ps(V,_MM_SHUFFLE(0,0,0,0)); // X
vResult = _mm_fmadd_ps( vTemp, M.r[0], vResult );
return vResult;
}
inline XMVECTOR XM_CALLCONV XMVector2TransformCoord
(
FXMVECTOR V,
CXMMATRIX M
)
{
XMVECTOR vResult = _mm_permute_ps(V,_MM_SHUFFLE(1,1,1,1)); // Y
vResult = _mm_fmadd_ps( vResult, M.r[1], M.r[3] );
XMVECTOR vTemp = _mm_permute_ps(V,_MM_SHUFFLE(0,0,0,0)); // X
vResult = _mm_fmadd_ps( vTemp, M.r[0], vResult );
XMVECTOR W = _mm_permute_ps(vResult,_MM_SHUFFLE(3,3,3,3));
vResult = _mm_div_ps( vResult, W );
return vResult;
}
inline XMVECTOR XM_CALLCONV XMVector2TransformNormal
(
FXMVECTOR V,
CXMMATRIX M
)
{
XMVECTOR vResult = _mm_permute_ps(V,_MM_SHUFFLE(1,1,1,1)); // Y
vResult = _mm_mul_ps( vResult, M.r[1] );
XMVECTOR vTemp = _mm_permute_ps(V,_MM_SHUFFLE(0,0,0,0)); // X
vResult = _mm_fmadd_ps( vTemp, M.r[0], vResult );
return vResult;
}
//-------------------------------------------------------------------------------------
// Vector3
//-------------------------------------------------------------------------------------
inline XMVECTOR XM_CALLCONV XMVector3Transform
(
FXMVECTOR V,
CXMMATRIX M
)
{
XMVECTOR vResult = _mm_permute_ps(V,_MM_SHUFFLE(2,2,2,2)); // Z
vResult = _mm_fmadd_ps( vResult, M.r[2], M.r[3] );
XMVECTOR vTemp = _mm_permute_ps(V,_MM_SHUFFLE(1,1,1,1)); // Y
vResult = _mm_fmadd_ps( vTemp, M.r[1], vResult );
vTemp = _mm_permute_ps(V,_MM_SHUFFLE(0,0,0,0)); // X
vResult = _mm_fmadd_ps( vTemp, M.r[0], vResult );
return vResult;
}
inline XMVECTOR XM_CALLCONV XMVector3TransformCoord
(
FXMVECTOR V,
CXMMATRIX M
)
{
XMVECTOR vResult = _mm_permute_ps(V,_MM_SHUFFLE(2,2,2,2)); // Z
vResult = _mm_fmadd_ps( vResult, M.r[2], M.r[3] );
XMVECTOR vTemp = _mm_permute_ps(V,_MM_SHUFFLE(1,1,1,1)); // Y
vResult = _mm_fmadd_ps( vTemp, M.r[1], vResult );
vTemp = _mm_permute_ps(V,_MM_SHUFFLE(0,0,0,0)); // X
vResult = _mm_fmadd_ps( vTemp, M.r[0], vResult );
XMVECTOR W = _mm_permute_ps(vResult,_MM_SHUFFLE(3,3,3,3));
vResult = _mm_div_ps( vResult, W );
return vResult;
}
inline XMVECTOR XM_CALLCONV XMVector3TransformNormal
(
FXMVECTOR V,
CXMMATRIX M
)
{
XMVECTOR vResult = _mm_permute_ps(V,_MM_SHUFFLE(2,2,2,2)); // Z
vResult = _mm_mul_ps( vResult, M.r[2] );
XMVECTOR vTemp = _mm_permute_ps(V,_MM_SHUFFLE(1,1,1,1)); // Y
vResult = _mm_fmadd_ps( vTemp, M.r[1], vResult );
vTemp = _mm_permute_ps(V,_MM_SHUFFLE(0,0,0,0)); // X
vResult = _mm_fmadd_ps( vTemp, M.r[0], vResult );
return vResult;
}
XMMATRIX XM_CALLCONV XMMatrixMultiply(CXMMATRIX M1, CXMMATRIX M2);
inline XMVECTOR XM_CALLCONV XMVector3Project
(
FXMVECTOR V,
float ViewportX,
float ViewportY,
float ViewportWidth,
float ViewportHeight,
float ViewportMinZ,
float ViewportMaxZ,
CXMMATRIX Projection,
CXMMATRIX View,
CXMMATRIX World
)
{
const float HalfViewportWidth = ViewportWidth * 0.5f;
const float HalfViewportHeight = ViewportHeight * 0.5f;
XMVECTOR Scale = XMVectorSet(HalfViewportWidth, -HalfViewportHeight, ViewportMaxZ - ViewportMinZ, 0.0f);
XMVECTOR Offset = XMVectorSet(ViewportX + HalfViewportWidth, ViewportY + HalfViewportHeight, ViewportMinZ, 0.0f);
XMMATRIX Transform = FMA3::XMMatrixMultiply(World, View);
Transform = FMA3::XMMatrixMultiply(Transform, Projection);
XMVECTOR Result = FMA3::XMVector3TransformCoord(V, Transform);
Result = FMA3::XMVectorMultiplyAdd(Result, Scale, Offset);
return Result;
}
inline XMVECTOR XM_CALLCONV XMVector3Unproject
(
FXMVECTOR V,
float ViewportX,
float ViewportY,
float ViewportWidth,
float ViewportHeight,
float ViewportMinZ,
float ViewportMaxZ,
CXMMATRIX Projection,
CXMMATRIX View,
CXMMATRIX World
)
{
static const XMVECTORF32 D = { -1.0f, 1.0f, 0.0f, 0.0f };
XMVECTOR Scale = XMVectorSet(ViewportWidth * 0.5f, -ViewportHeight * 0.5f, ViewportMaxZ - ViewportMinZ, 1.0f);
Scale = XMVectorReciprocal(Scale);
XMVECTOR Offset = XMVectorSet(-ViewportX, -ViewportY, -ViewportMinZ, 0.0f);
Offset = FMA3::XMVectorMultiplyAdd(Scale, Offset, D.v);
XMMATRIX Transform = FMA3::XMMatrixMultiply(World, View);
Transform = FMA3::XMMatrixMultiply(Transform, Projection);
Transform = XMMatrixInverse(nullptr, Transform);
XMVECTOR Result = FMA3::XMVectorMultiplyAdd(V, Scale, Offset);
return FMA3::XMVector3TransformCoord(Result, Transform);
}
//-------------------------------------------------------------------------------------
// Vector4
//-------------------------------------------------------------------------------------
inline XMVECTOR XM_CALLCONV XMVector4Transform
(
FXMVECTOR V,
CXMMATRIX M
)
{
XMVECTOR vResult = _mm_permute_ps(V,_MM_SHUFFLE(3,3,3,3)); // W
vResult = _mm_mul_ps( vResult, M.r[3] );
XMVECTOR vTemp = _mm_permute_ps(V,_MM_SHUFFLE(2,2,2,2)); // Z
vResult = _mm_fmadd_ps( vTemp, M.r[2], vResult );
vTemp = _mm_permute_ps(V,_MM_SHUFFLE(1,1,1,1)); // Y
vResult = _mm_fmadd_ps( vTemp, M.r[1], vResult );
vTemp = _mm_permute_ps(V,_MM_SHUFFLE(0,0,0,0)); // X
vResult = _mm_fmadd_ps( vTemp, M.r[0], vResult );
return vResult;
}
//-------------------------------------------------------------------------------------
// Matrix
//-------------------------------------------------------------------------------------
inline XMMATRIX XM_CALLCONV XMMatrixMultiply
(
CXMMATRIX M1,
CXMMATRIX M2
)
{
XMMATRIX mResult;
// Use vW to hold the original row
XMVECTOR vW = M1.r[0];
// Splat the component X,Y,Z then W
XMVECTOR vX = _mm_permute_ps(vW,_MM_SHUFFLE(0,0,0,0));
XMVECTOR vY = _mm_permute_ps(vW,_MM_SHUFFLE(1,1,1,1));
XMVECTOR vZ = _mm_permute_ps(vW,_MM_SHUFFLE(2,2,2,2));
vW = _mm_permute_ps(vW,_MM_SHUFFLE(3,3,3,3));
// Perform the operation on the first row
vX = _mm_mul_ps(vX,M2.r[0]);
vX = _mm_fmadd_ps(vY,M2.r[1],vX);
vX = _mm_fmadd_ps(vZ,M2.r[2],vX);
vX = _mm_fmadd_ps(vW,M2.r[3],vX);
mResult.r[0] = vX;
// Repeat for the other 3 rows
vW = M1.r[1];
vX = _mm_permute_ps(vW,_MM_SHUFFLE(0,0,0,0));
vY = _mm_permute_ps(vW,_MM_SHUFFLE(1,1,1,1));
vZ = _mm_permute_ps(vW,_MM_SHUFFLE(2,2,2,2));
vW = _mm_permute_ps(vW,_MM_SHUFFLE(3,3,3,3));
vX = _mm_mul_ps(vX,M2.r[0]);
vX = _mm_fmadd_ps(vY,M2.r[1],vX);
vX = _mm_fmadd_ps(vZ,M2.r[2],vX);
vX = _mm_fmadd_ps(vW,M2.r[3],vX);
mResult.r[1] = vX;
vW = M1.r[2];
vX = _mm_permute_ps(vW,_MM_SHUFFLE(0,0,0,0));
vY = _mm_permute_ps(vW,_MM_SHUFFLE(1,1,1,1));
vZ = _mm_permute_ps(vW,_MM_SHUFFLE(2,2,2,2));
vW = _mm_permute_ps(vW,_MM_SHUFFLE(3,3,3,3));
vX = _mm_mul_ps(vX,M2.r[0]);
vX = _mm_fmadd_ps(vY,M2.r[1],vX);
vX = _mm_fmadd_ps(vZ,M2.r[2],vX);
vX = _mm_fmadd_ps(vW,M2.r[3],vX);
mResult.r[2] = vX;
vW = M1.r[3];
vX = _mm_permute_ps(vW,_MM_SHUFFLE(0,0,0,0));
vY = _mm_permute_ps(vW,_MM_SHUFFLE(1,1,1,1));
vZ = _mm_permute_ps(vW,_MM_SHUFFLE(2,2,2,2));
vW = _mm_permute_ps(vW,_MM_SHUFFLE(3,3,3,3));
vX = _mm_mul_ps(vX,M2.r[0]);
vX = _mm_fmadd_ps(vY,M2.r[1],vX);
vX = _mm_fmadd_ps(vZ,M2.r[2],vX);
vX = _mm_fmadd_ps(vW,M2.r[3],vX);
mResult.r[3] = vX;
return mResult;
}
inline XMMATRIX XM_CALLCONV XMMatrixMultiplyTranspose
(
FXMMATRIX M1,
CXMMATRIX M2
)
{
// Use vW to hold the original row
XMVECTOR vW = M1.r[0];
// Splat the component X,Y,Z then W
XMVECTOR vX = _mm_permute_ps(vW,_MM_SHUFFLE(0,0,0,0));
XMVECTOR vY = _mm_permute_ps(vW,_MM_SHUFFLE(1,1,1,1));
XMVECTOR vZ = _mm_permute_ps(vW,_MM_SHUFFLE(2,2,2,2));
vW = _mm_permute_ps(vW,_MM_SHUFFLE(3,3,3,3));
// Perform the operation on the first row
vX = _mm_mul_ps(vX,M2.r[0]);
vX = _mm_fmadd_ps(vY,M2.r[1],vX);
vX = _mm_fmadd_ps(vZ,M2.r[2],vX);
vX = _mm_fmadd_ps(vW,M2.r[3],vX);
__m128 r0 = vX;
// Repeat for the other 3 rows
vW = M1.r[1];
vX = _mm_permute_ps(vW,_MM_SHUFFLE(0,0,0,0));
vY = _mm_permute_ps(vW,_MM_SHUFFLE(1,1,1,1));
vZ = _mm_permute_ps(vW,_MM_SHUFFLE(2,2,2,2));
vW = _mm_permute_ps(vW,_MM_SHUFFLE(3,3,3,3));
vX = _mm_mul_ps(vX,M2.r[0]);
vX = _mm_fmadd_ps(vY,M2.r[1],vX);
vX = _mm_fmadd_ps(vZ,M2.r[2],vX);
vX = _mm_fmadd_ps(vW,M2.r[3],vX);
__m128 r1 = vX;
vW = M1.r[2];
vX = _mm_permute_ps(vW,_MM_SHUFFLE(0,0,0,0));
vY = _mm_permute_ps(vW,_MM_SHUFFLE(1,1,1,1));
vZ = _mm_permute_ps(vW,_MM_SHUFFLE(2,2,2,2));
vW = _mm_permute_ps(vW,_MM_SHUFFLE(3,3,3,3));
vX = _mm_mul_ps(vX,M2.r[0]);
vX = _mm_fmadd_ps(vY,M2.r[1],vX);
vX = _mm_fmadd_ps(vZ,M2.r[2],vX);
vX = _mm_fmadd_ps(vW,M2.r[3],vX);
__m128 r2 = vX;
vW = M1.r[3];
vX = _mm_permute_ps(vW,_MM_SHUFFLE(0,0,0,0));
vY = _mm_permute_ps(vW,_MM_SHUFFLE(1,1,1,1));
vZ = _mm_permute_ps(vW,_MM_SHUFFLE(2,2,2,2));
vW = _mm_permute_ps(vW,_MM_SHUFFLE(3,3,3,3));
vX = _mm_mul_ps(vX,M2.r[0]);
vX = _mm_fmadd_ps(vY,M2.r[1],vX);
vX = _mm_fmadd_ps(vZ,M2.r[2],vX);
vX = _mm_fmadd_ps(vW,M2.r[3],vX);
__m128 r3 = vX;
// x.x,x.y,y.x,y.y
XMVECTOR vTemp1 = _mm_shuffle_ps(r0,r1,_MM_SHUFFLE(1,0,1,0));
// x.z,x.w,y.z,y.w
XMVECTOR vTemp3 = _mm_shuffle_ps(r0,r1,_MM_SHUFFLE(3,2,3,2));
// z.x,z.y,w.x,w.y
XMVECTOR vTemp2 = _mm_shuffle_ps(r2,r3,_MM_SHUFFLE(1,0,1,0));
// z.z,z.w,w.z,w.w
XMVECTOR vTemp4 = _mm_shuffle_ps(r2,r3,_MM_SHUFFLE(3,2,3,2));
XMMATRIX mResult;
// x.x,y.x,z.x,w.x
mResult.r[0] = _mm_shuffle_ps(vTemp1, vTemp2,_MM_SHUFFLE(2,0,2,0));
// x.y,y.y,z.y,w.y
mResult.r[1] = _mm_shuffle_ps(vTemp1, vTemp2,_MM_SHUFFLE(3,1,3,1));
// x.z,y.z,z.z,w.z
mResult.r[2] = _mm_shuffle_ps(vTemp3, vTemp4,_MM_SHUFFLE(2,0,2,0));
// x.w,y.w,z.w,w.w
mResult.r[3] = _mm_shuffle_ps(vTemp3, vTemp4,_MM_SHUFFLE(3,1,3,1));
return mResult;
}
}; // namespace FMA3
}; // namespace DirectX;
//-------------------------------------------------------------------------------------
// DirectXMathFMA3.h -- FMA3 extensions for SIMD C++ Math library
//
// THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF
// ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO
// THE IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A
// PARTICULAR PURPOSE.
//
// Copyright (c) Microsoft Corporation. All rights reserved.
//
// http://go.microsoft.com/fwlink/?LinkID=615560
//-------------------------------------------------------------------------------------
#ifdef _MSC_VER
#pragma once
#endif
#ifdef _M_ARM
#error FMA3 not supported on ARM platform
#endif
#if defined(_MSC_VER) && (_MSC_VER < 1700)
#error FMA3 intrinsics requires Visual C++ 2012 or later.
#endif
#pragma warning(push)
#pragma warning(disable : 4987)
#include <intrin.h>
#pragma warning(pop)
#include <immintrin.h>
#include <DirectXMath.h>
namespace DirectX
{
#if (DIRECTXMATH_VERSION < 305) && !defined(XM_CALLCONV)
#define XM_CALLCONV __fastcall
typedef const DirectX::XMVECTOR& HXMVECTOR;
typedef const DirectX::XMMATRIX& FXMMATRIX;
#endif
namespace FMA3
{
inline bool XMVerifyFMA3Support()
{
// Should return true for AMD "Pildriver" and Intel "Haswell" processors
// with OS support for AVX (Windows 7 Service Pack 1, Windows Server 2008 R2 Service Pack 1, Windows 8, Windows Server 2012)
// See http://msdn.microsoft.com/en-us/library/hskdteyh.aspx
int CPUInfo[4] = {-1};
__cpuid( CPUInfo, 0 );
if ( CPUInfo[0] < 1 )
return false;
__cpuid(CPUInfo, 1 );
// We check for FMA3, AVX, OSXSAVE
return ( (CPUInfo[2] & 0x18001000) == 0x18001000 );
}
//-------------------------------------------------------------------------------------
// Vector
//-------------------------------------------------------------------------------------
inline XMVECTOR XM_CALLCONV XMVectorMultiplyAdd
(
FXMVECTOR V1,
FXMVECTOR V2,
FXMVECTOR V3
)
{
return _mm_fmadd_ps( V1, V2, V3 );
}
inline XMVECTOR XM_CALLCONV XMVectorNegativeMultiplySubtract
(
FXMVECTOR V1,
FXMVECTOR V2,
FXMVECTOR V3
)
{
return _mm_fnmadd_ps( V1, V2, V3 );
}
//-------------------------------------------------------------------------------------
// Vector2
//-------------------------------------------------------------------------------------
inline XMVECTOR XM_CALLCONV XMVector2Transform
(
FXMVECTOR V,
CXMMATRIX M
)
{
XMVECTOR vResult = _mm_permute_ps(V,_MM_SHUFFLE(1,1,1,1)); // Y
vResult = _mm_fmadd_ps( vResult, M.r[1], M.r[3] );
XMVECTOR vTemp = _mm_permute_ps(V,_MM_SHUFFLE(0,0,0,0)); // X
vResult = _mm_fmadd_ps( vTemp, M.r[0], vResult );
return vResult;
}
inline XMVECTOR XM_CALLCONV XMVector2TransformCoord
(
FXMVECTOR V,
CXMMATRIX M
)
{
XMVECTOR vResult = _mm_permute_ps(V,_MM_SHUFFLE(1,1,1,1)); // Y
vResult = _mm_fmadd_ps( vResult, M.r[1], M.r[3] );
XMVECTOR vTemp = _mm_permute_ps(V,_MM_SHUFFLE(0,0,0,0)); // X
vResult = _mm_fmadd_ps( vTemp, M.r[0], vResult );
XMVECTOR W = _mm_permute_ps(vResult,_MM_SHUFFLE(3,3,3,3));
vResult = _mm_div_ps( vResult, W );
return vResult;
}
inline XMVECTOR XM_CALLCONV XMVector2TransformNormal
(
FXMVECTOR V,
CXMMATRIX M
)
{
XMVECTOR vResult = _mm_permute_ps(V,_MM_SHUFFLE(1,1,1,1)); // Y
vResult = _mm_mul_ps( vResult, M.r[1] );
XMVECTOR vTemp = _mm_permute_ps(V,_MM_SHUFFLE(0,0,0,0)); // X
vResult = _mm_fmadd_ps( vTemp, M.r[0], vResult );
return vResult;
}
//-------------------------------------------------------------------------------------
// Vector3
//-------------------------------------------------------------------------------------
inline XMVECTOR XM_CALLCONV XMVector3Transform
(
FXMVECTOR V,
CXMMATRIX M
)
{
XMVECTOR vResult = _mm_permute_ps(V,_MM_SHUFFLE(2,2,2,2)); // Z
vResult = _mm_fmadd_ps( vResult, M.r[2], M.r[3] );
XMVECTOR vTemp = _mm_permute_ps(V,_MM_SHUFFLE(1,1,1,1)); // Y
vResult = _mm_fmadd_ps( vTemp, M.r[1], vResult );
vTemp = _mm_permute_ps(V,_MM_SHUFFLE(0,0,0,0)); // X
vResult = _mm_fmadd_ps( vTemp, M.r[0], vResult );
return vResult;
}
inline XMVECTOR XM_CALLCONV XMVector3TransformCoord
(
FXMVECTOR V,
CXMMATRIX M
)
{
XMVECTOR vResult = _mm_permute_ps(V,_MM_SHUFFLE(2,2,2,2)); // Z
vResult = _mm_fmadd_ps( vResult, M.r[2], M.r[3] );
XMVECTOR vTemp = _mm_permute_ps(V,_MM_SHUFFLE(1,1,1,1)); // Y
vResult = _mm_fmadd_ps( vTemp, M.r[1], vResult );
vTemp = _mm_permute_ps(V,_MM_SHUFFLE(0,0,0,0)); // X
vResult = _mm_fmadd_ps( vTemp, M.r[0], vResult );
XMVECTOR W = _mm_permute_ps(vResult,_MM_SHUFFLE(3,3,3,3));
vResult = _mm_div_ps( vResult, W );
return vResult;
}
inline XMVECTOR XM_CALLCONV XMVector3TransformNormal
(
FXMVECTOR V,
CXMMATRIX M
)
{
XMVECTOR vResult = _mm_permute_ps(V,_MM_SHUFFLE(2,2,2,2)); // Z
vResult = _mm_mul_ps( vResult, M.r[2] );
XMVECTOR vTemp = _mm_permute_ps(V,_MM_SHUFFLE(1,1,1,1)); // Y
vResult = _mm_fmadd_ps( vTemp, M.r[1], vResult );
vTemp = _mm_permute_ps(V,_MM_SHUFFLE(0,0,0,0)); // X
vResult = _mm_fmadd_ps( vTemp, M.r[0], vResult );
return vResult;
}
XMMATRIX XM_CALLCONV XMMatrixMultiply(CXMMATRIX M1, CXMMATRIX M2);
inline XMVECTOR XM_CALLCONV XMVector3Project
(
FXMVECTOR V,
float ViewportX,
float ViewportY,
float ViewportWidth,
float ViewportHeight,
float ViewportMinZ,
float ViewportMaxZ,
CXMMATRIX Projection,
CXMMATRIX View,
CXMMATRIX World
)
{
const float HalfViewportWidth = ViewportWidth * 0.5f;
const float HalfViewportHeight = ViewportHeight * 0.5f;
XMVECTOR Scale = XMVectorSet(HalfViewportWidth, -HalfViewportHeight, ViewportMaxZ - ViewportMinZ, 0.0f);
XMVECTOR Offset = XMVectorSet(ViewportX + HalfViewportWidth, ViewportY + HalfViewportHeight, ViewportMinZ, 0.0f);
XMMATRIX Transform = FMA3::XMMatrixMultiply(World, View);
Transform = FMA3::XMMatrixMultiply(Transform, Projection);
XMVECTOR Result = FMA3::XMVector3TransformCoord(V, Transform);
Result = FMA3::XMVectorMultiplyAdd(Result, Scale, Offset);
return Result;
}
inline XMVECTOR XM_CALLCONV XMVector3Unproject
(
FXMVECTOR V,
float ViewportX,
float ViewportY,
float ViewportWidth,
float ViewportHeight,
float ViewportMinZ,
float ViewportMaxZ,
CXMMATRIX Projection,
CXMMATRIX View,
CXMMATRIX World
)
{
static const XMVECTORF32 D = { -1.0f, 1.0f, 0.0f, 0.0f };
XMVECTOR Scale = XMVectorSet(ViewportWidth * 0.5f, -ViewportHeight * 0.5f, ViewportMaxZ - ViewportMinZ, 1.0f);
Scale = XMVectorReciprocal(Scale);
XMVECTOR Offset = XMVectorSet(-ViewportX, -ViewportY, -ViewportMinZ, 0.0f);
Offset = FMA3::XMVectorMultiplyAdd(Scale, Offset, D.v);
XMMATRIX Transform = FMA3::XMMatrixMultiply(World, View);
Transform = FMA3::XMMatrixMultiply(Transform, Projection);
Transform = XMMatrixInverse(nullptr, Transform);
XMVECTOR Result = FMA3::XMVectorMultiplyAdd(V, Scale, Offset);
return FMA3::XMVector3TransformCoord(Result, Transform);
}
//-------------------------------------------------------------------------------------
// Vector4
//-------------------------------------------------------------------------------------
inline XMVECTOR XM_CALLCONV XMVector4Transform
(
FXMVECTOR V,
CXMMATRIX M
)
{
XMVECTOR vResult = _mm_permute_ps(V,_MM_SHUFFLE(3,3,3,3)); // W
vResult = _mm_mul_ps( vResult, M.r[3] );
XMVECTOR vTemp = _mm_permute_ps(V,_MM_SHUFFLE(2,2,2,2)); // Z
vResult = _mm_fmadd_ps( vTemp, M.r[2], vResult );
vTemp = _mm_permute_ps(V,_MM_SHUFFLE(1,1,1,1)); // Y
vResult = _mm_fmadd_ps( vTemp, M.r[1], vResult );
vTemp = _mm_permute_ps(V,_MM_SHUFFLE(0,0,0,0)); // X
vResult = _mm_fmadd_ps( vTemp, M.r[0], vResult );
return vResult;
}
//-------------------------------------------------------------------------------------
// Matrix
//-------------------------------------------------------------------------------------
inline XMMATRIX XM_CALLCONV XMMatrixMultiply
(
CXMMATRIX M1,
CXMMATRIX M2
)
{
XMMATRIX mResult;
// Use vW to hold the original row
XMVECTOR vW = M1.r[0];
// Splat the component X,Y,Z then W
XMVECTOR vX = _mm_permute_ps(vW,_MM_SHUFFLE(0,0,0,0));
XMVECTOR vY = _mm_permute_ps(vW,_MM_SHUFFLE(1,1,1,1));
XMVECTOR vZ = _mm_permute_ps(vW,_MM_SHUFFLE(2,2,2,2));
vW = _mm_permute_ps(vW,_MM_SHUFFLE(3,3,3,3));
// Perform the operation on the first row
vX = _mm_mul_ps(vX,M2.r[0]);
vX = _mm_fmadd_ps(vY,M2.r[1],vX);
vX = _mm_fmadd_ps(vZ,M2.r[2],vX);
vX = _mm_fmadd_ps(vW,M2.r[3],vX);
mResult.r[0] = vX;
// Repeat for the other 3 rows
vW = M1.r[1];
vX = _mm_permute_ps(vW,_MM_SHUFFLE(0,0,0,0));
vY = _mm_permute_ps(vW,_MM_SHUFFLE(1,1,1,1));
vZ = _mm_permute_ps(vW,_MM_SHUFFLE(2,2,2,2));
vW = _mm_permute_ps(vW,_MM_SHUFFLE(3,3,3,3));
vX = _mm_mul_ps(vX,M2.r[0]);
vX = _mm_fmadd_ps(vY,M2.r[1],vX);
vX = _mm_fmadd_ps(vZ,M2.r[2],vX);
vX = _mm_fmadd_ps(vW,M2.r[3],vX);
mResult.r[1] = vX;
vW = M1.r[2];
vX = _mm_permute_ps(vW,_MM_SHUFFLE(0,0,0,0));
vY = _mm_permute_ps(vW,_MM_SHUFFLE(1,1,1,1));
vZ = _mm_permute_ps(vW,_MM_SHUFFLE(2,2,2,2));
vW = _mm_permute_ps(vW,_MM_SHUFFLE(3,3,3,3));
vX = _mm_mul_ps(vX,M2.r[0]);
vX = _mm_fmadd_ps(vY,M2.r[1],vX);
vX = _mm_fmadd_ps(vZ,M2.r[2],vX);
vX = _mm_fmadd_ps(vW,M2.r[3],vX);
mResult.r[2] = vX;
vW = M1.r[3];
vX = _mm_permute_ps(vW,_MM_SHUFFLE(0,0,0,0));
vY = _mm_permute_ps(vW,_MM_SHUFFLE(1,1,1,1));
vZ = _mm_permute_ps(vW,_MM_SHUFFLE(2,2,2,2));
vW = _mm_permute_ps(vW,_MM_SHUFFLE(3,3,3,3));
vX = _mm_mul_ps(vX,M2.r[0]);
vX = _mm_fmadd_ps(vY,M2.r[1],vX);
vX = _mm_fmadd_ps(vZ,M2.r[2],vX);
vX = _mm_fmadd_ps(vW,M2.r[3],vX);
mResult.r[3] = vX;
return mResult;
}
inline XMMATRIX XM_CALLCONV XMMatrixMultiplyTranspose
(
FXMMATRIX M1,
CXMMATRIX M2
)
{
// Use vW to hold the original row
XMVECTOR vW = M1.r[0];
// Splat the component X,Y,Z then W
XMVECTOR vX = _mm_permute_ps(vW,_MM_SHUFFLE(0,0,0,0));
XMVECTOR vY = _mm_permute_ps(vW,_MM_SHUFFLE(1,1,1,1));
XMVECTOR vZ = _mm_permute_ps(vW,_MM_SHUFFLE(2,2,2,2));
vW = _mm_permute_ps(vW,_MM_SHUFFLE(3,3,3,3));
// Perform the operation on the first row
vX = _mm_mul_ps(vX,M2.r[0]);
vX = _mm_fmadd_ps(vY,M2.r[1],vX);
vX = _mm_fmadd_ps(vZ,M2.r[2],vX);
vX = _mm_fmadd_ps(vW,M2.r[3],vX);
__m128 r0 = vX;
// Repeat for the other 3 rows
vW = M1.r[1];
vX = _mm_permute_ps(vW,_MM_SHUFFLE(0,0,0,0));
vY = _mm_permute_ps(vW,_MM_SHUFFLE(1,1,1,1));
vZ = _mm_permute_ps(vW,_MM_SHUFFLE(2,2,2,2));
vW = _mm_permute_ps(vW,_MM_SHUFFLE(3,3,3,3));
vX = _mm_mul_ps(vX,M2.r[0]);
vX = _mm_fmadd_ps(vY,M2.r[1],vX);
vX = _mm_fmadd_ps(vZ,M2.r[2],vX);
vX = _mm_fmadd_ps(vW,M2.r[3],vX);
__m128 r1 = vX;
vW = M1.r[2];
vX = _mm_permute_ps(vW,_MM_SHUFFLE(0,0,0,0));
vY = _mm_permute_ps(vW,_MM_SHUFFLE(1,1,1,1));
vZ = _mm_permute_ps(vW,_MM_SHUFFLE(2,2,2,2));
vW = _mm_permute_ps(vW,_MM_SHUFFLE(3,3,3,3));
vX = _mm_mul_ps(vX,M2.r[0]);
vX = _mm_fmadd_ps(vY,M2.r[1],vX);
vX = _mm_fmadd_ps(vZ,M2.r[2],vX);
vX = _mm_fmadd_ps(vW,M2.r[3],vX);
__m128 r2 = vX;
vW = M1.r[3];
vX = _mm_permute_ps(vW,_MM_SHUFFLE(0,0,0,0));
vY = _mm_permute_ps(vW,_MM_SHUFFLE(1,1,1,1));
vZ = _mm_permute_ps(vW,_MM_SHUFFLE(2,2,2,2));
vW = _mm_permute_ps(vW,_MM_SHUFFLE(3,3,3,3));
vX = _mm_mul_ps(vX,M2.r[0]);
vX = _mm_fmadd_ps(vY,M2.r[1],vX);
vX = _mm_fmadd_ps(vZ,M2.r[2],vX);
vX = _mm_fmadd_ps(vW,M2.r[3],vX);
__m128 r3 = vX;
// x.x,x.y,y.x,y.y
XMVECTOR vTemp1 = _mm_shuffle_ps(r0,r1,_MM_SHUFFLE(1,0,1,0));
// x.z,x.w,y.z,y.w
XMVECTOR vTemp3 = _mm_shuffle_ps(r0,r1,_MM_SHUFFLE(3,2,3,2));
// z.x,z.y,w.x,w.y
XMVECTOR vTemp2 = _mm_shuffle_ps(r2,r3,_MM_SHUFFLE(1,0,1,0));
// z.z,z.w,w.z,w.w
XMVECTOR vTemp4 = _mm_shuffle_ps(r2,r3,_MM_SHUFFLE(3,2,3,2));
XMMATRIX mResult;
// x.x,y.x,z.x,w.x
mResult.r[0] = _mm_shuffle_ps(vTemp1, vTemp2,_MM_SHUFFLE(2,0,2,0));
// x.y,y.y,z.y,w.y
mResult.r[1] = _mm_shuffle_ps(vTemp1, vTemp2,_MM_SHUFFLE(3,1,3,1));
// x.z,y.z,z.z,w.z
mResult.r[2] = _mm_shuffle_ps(vTemp3, vTemp4,_MM_SHUFFLE(2,0,2,0));
// x.w,y.w,z.w,w.w
mResult.r[3] = _mm_shuffle_ps(vTemp3, vTemp4,_MM_SHUFFLE(3,1,3,1));
return mResult;
}
}; // namespace FMA3
}; // namespace DirectX;

View File

@ -1,414 +1,414 @@
//-------------------------------------------------------------------------------------
// DirectXMathFMA4.h -- FMA4 extensions for SIMD C++ Math library
//
// THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF
// ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO
// THE IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A
// PARTICULAR PURPOSE.
//
// Copyright (c) Microsoft Corporation. All rights reserved.
//
// http://go.microsoft.com/fwlink/?LinkID=615560
//-------------------------------------------------------------------------------------
#ifdef _MSC_VER
#pragma once
#endif
#ifdef _M_ARM
#error FMA4 not supported on ARM platform
#endif
#if defined(_MSC_VER) && (_MSC_VER < 1600)
#error FMA4 intrinsics requires Visual C++ 2010 Service Pack 1 or later.
#endif
#pragma warning(push)
#pragma warning(disable : 4987)
#include <intrin.h>
#pragma warning(pop)
#include <ammintrin.h>
#include <DirectXMath.h>
namespace DirectX
{
#if (DIRECTXMATH_VERSION < 305) && !defined(XM_CALLCONV)
#define XM_CALLCONV __fastcall
typedef const DirectX::XMVECTOR& HXMVECTOR;
typedef const DirectX::XMMATRIX& FXMMATRIX;
#endif
namespace FMA4
{
inline bool XMVerifyFMA4Support()
{
// Should return true for AMD Bulldozer processors
// with OS support for AVX (Windows 7 Service Pack 1, Windows Server 2008 R2 Service Pack 1, Windows 8, Windows Server 2012)
// See http://msdn.microsoft.com/en-us/library/hskdteyh.aspx
int CPUInfo[4] = {-1};
__cpuid( CPUInfo, 0 );
if ( CPUInfo[0] < 1 )
return false;
__cpuid(CPUInfo, 1 );
// We check for AVX, OSXSAVE (required to access FMA4)
if ( (CPUInfo[2] & 0x18000000) != 0x18000000 )
return false;
__cpuid( CPUInfo, 0x80000000 );
if ( CPUInfo[0] < 0x80000001 )
return false;
// We check for FMA4
return ( CPUInfo[2] & 0x10000 );
}
//-------------------------------------------------------------------------------------
// Vector
//-------------------------------------------------------------------------------------
inline XMVECTOR XM_CALLCONV XMVectorMultiplyAdd
(
FXMVECTOR V1,
FXMVECTOR V2,
FXMVECTOR V3
)
{
return _mm_macc_ps( V1, V2, V3 );
}
inline XMVECTOR XM_CALLCONV XMVectorNegativeMultiplySubtract
(
FXMVECTOR V1,
FXMVECTOR V2,
FXMVECTOR V3
)
{
return _mm_nmacc_ps( V1, V2, V3 );
}
//-------------------------------------------------------------------------------------
// Vector2
//-------------------------------------------------------------------------------------
inline XMVECTOR XM_CALLCONV XMVector2Transform
(
FXMVECTOR V,
CXMMATRIX M
)
{
XMVECTOR vResult = _mm_permute_ps(V,_MM_SHUFFLE(1,1,1,1)); // Y
vResult = _mm_macc_ps( vResult, M.r[1], M.r[3] );
XMVECTOR vTemp = _mm_permute_ps(V,_MM_SHUFFLE(0,0,0,0)); // X
vResult = _mm_macc_ps( vTemp, M.r[0], vResult );
return vResult;
}
inline XMVECTOR XM_CALLCONV XMVector2TransformCoord
(
FXMVECTOR V,
CXMMATRIX M
)
{
XMVECTOR vResult = _mm_permute_ps(V,_MM_SHUFFLE(1,1,1,1)); // Y
vResult = _mm_macc_ps( vResult, M.r[1], M.r[3] );
XMVECTOR vTemp = _mm_permute_ps(V,_MM_SHUFFLE(0,0,0,0)); // X
vResult = _mm_macc_ps( vTemp, M.r[0], vResult );
XMVECTOR W = _mm_permute_ps(vResult,_MM_SHUFFLE(3,3,3,3));
vResult = _mm_div_ps( vResult, W );
return vResult;
}
inline XMVECTOR XM_CALLCONV XMVector2TransformNormal
(
FXMVECTOR V,
CXMMATRIX M
)
{
XMVECTOR vResult = _mm_permute_ps(V,_MM_SHUFFLE(1,1,1,1)); // Y
vResult = _mm_mul_ps( vResult, M.r[1] );
XMVECTOR vTemp = _mm_permute_ps(V,_MM_SHUFFLE(0,0,0,0)); // X
vResult = _mm_macc_ps( vTemp, M.r[0], vResult );
return vResult;
}
//-------------------------------------------------------------------------------------
// Vector3
//-------------------------------------------------------------------------------------
inline XMVECTOR XM_CALLCONV XMVector3Transform
(
FXMVECTOR V,
CXMMATRIX M
)
{
XMVECTOR vResult = _mm_permute_ps(V,_MM_SHUFFLE(2,2,2,2)); // Z
vResult = _mm_macc_ps( vResult, M.r[2], M.r[3] );
XMVECTOR vTemp = _mm_permute_ps(V,_MM_SHUFFLE(1,1,1,1)); // Y
vResult = _mm_macc_ps( vTemp, M.r[1], vResult );
vTemp = _mm_permute_ps(V,_MM_SHUFFLE(0,0,0,0)); // X
vResult = _mm_macc_ps( vTemp, M.r[0], vResult );
return vResult;
}
inline XMVECTOR XM_CALLCONV XMVector3TransformCoord
(
FXMVECTOR V,
CXMMATRIX M
)
{
XMVECTOR vResult = _mm_permute_ps(V,_MM_SHUFFLE(2,2,2,2)); // Z
vResult = _mm_macc_ps( vResult, M.r[2], M.r[3] );
XMVECTOR vTemp = _mm_permute_ps(V,_MM_SHUFFLE(1,1,1,1)); // Y
vResult = _mm_macc_ps( vTemp, M.r[1], vResult );
vTemp = _mm_permute_ps(V,_MM_SHUFFLE(0,0,0,0)); // X
vResult = _mm_macc_ps( vTemp, M.r[0], vResult );
XMVECTOR W = _mm_permute_ps(vResult,_MM_SHUFFLE(3,3,3,3));
vResult = _mm_div_ps( vResult, W );
return vResult;
}
inline XMVECTOR XM_CALLCONV XMVector3TransformNormal
(
FXMVECTOR V,
CXMMATRIX M
)
{
XMVECTOR vResult = _mm_permute_ps(V,_MM_SHUFFLE(2,2,2,2)); // Z
vResult = _mm_mul_ps( vResult, M.r[2] );
XMVECTOR vTemp = _mm_permute_ps(V,_MM_SHUFFLE(1,1,1,1)); // Y
vResult = _mm_macc_ps( vTemp, M.r[1], vResult );
vTemp = _mm_permute_ps(V,_MM_SHUFFLE(0,0,0,0)); // X
vResult = _mm_macc_ps( vTemp, M.r[0], vResult );
return vResult;
}
XMMATRIX XM_CALLCONV XMMatrixMultiply(CXMMATRIX M1, CXMMATRIX M2);
inline XMVECTOR XM_CALLCONV XMVector3Project
(
FXMVECTOR V,
float ViewportX,
float ViewportY,
float ViewportWidth,
float ViewportHeight,
float ViewportMinZ,
float ViewportMaxZ,
CXMMATRIX Projection,
CXMMATRIX View,
CXMMATRIX World
)
{
const float HalfViewportWidth = ViewportWidth * 0.5f;
const float HalfViewportHeight = ViewportHeight * 0.5f;
XMVECTOR Scale = XMVectorSet(HalfViewportWidth, -HalfViewportHeight, ViewportMaxZ - ViewportMinZ, 0.0f);
XMVECTOR Offset = XMVectorSet(ViewportX + HalfViewportWidth, ViewportY + HalfViewportHeight, ViewportMinZ, 0.0f);
XMMATRIX Transform = FMA4::XMMatrixMultiply(World, View);
Transform = FMA4::XMMatrixMultiply(Transform, Projection);
XMVECTOR Result = FMA4::XMVector3TransformCoord(V, Transform);
Result = FMA4::XMVectorMultiplyAdd(Result, Scale, Offset);
return Result;
}
inline XMVECTOR XM_CALLCONV XMVector3Unproject
(
FXMVECTOR V,
float ViewportX,
float ViewportY,
float ViewportWidth,
float ViewportHeight,
float ViewportMinZ,
float ViewportMaxZ,
CXMMATRIX Projection,
CXMMATRIX View,
CXMMATRIX World
)
{
static const XMVECTORF32 D = { -1.0f, 1.0f, 0.0f, 0.0f };
XMVECTOR Scale = XMVectorSet(ViewportWidth * 0.5f, -ViewportHeight * 0.5f, ViewportMaxZ - ViewportMinZ, 1.0f);
Scale = XMVectorReciprocal(Scale);
XMVECTOR Offset = XMVectorSet(-ViewportX, -ViewportY, -ViewportMinZ, 0.0f);
Offset = FMA4::XMVectorMultiplyAdd(Scale, Offset, D.v);
XMMATRIX Transform = FMA4::XMMatrixMultiply(World, View);
Transform = FMA4::XMMatrixMultiply(Transform, Projection);
Transform = XMMatrixInverse(nullptr, Transform);
XMVECTOR Result = FMA4::XMVectorMultiplyAdd(V, Scale, Offset);
return FMA4::XMVector3TransformCoord(Result, Transform);
}
//-------------------------------------------------------------------------------------
// Vector4
//-------------------------------------------------------------------------------------
inline XMVECTOR XM_CALLCONV XMVector4Transform
(
FXMVECTOR V,
CXMMATRIX M
)
{
XMVECTOR vResult = _mm_permute_ps(V,_MM_SHUFFLE(3,3,3,3)); // W
vResult = _mm_mul_ps( vResult, M.r[3] );
XMVECTOR vTemp = _mm_permute_ps(V,_MM_SHUFFLE(2,2,2,2)); // Z
vResult = _mm_macc_ps( vTemp, M.r[2], vResult );
vTemp = _mm_permute_ps(V,_MM_SHUFFLE(1,1,1,1)); // Y
vResult = _mm_macc_ps( vTemp, M.r[1], vResult );
vTemp = _mm_permute_ps(V,_MM_SHUFFLE(0,0,0,0)); // X
vResult = _mm_macc_ps( vTemp, M.r[0], vResult );
return vResult;
}
//-------------------------------------------------------------------------------------
// Matrix
//-------------------------------------------------------------------------------------
inline XMMATRIX XM_CALLCONV XMMatrixMultiply
(
CXMMATRIX M1,
CXMMATRIX M2
)
{
XMMATRIX mResult;
// Use vW to hold the original row
XMVECTOR vW = M1.r[0];
// Splat the component X,Y,Z then W
XMVECTOR vX = _mm_permute_ps(vW,_MM_SHUFFLE(0,0,0,0));
XMVECTOR vY = _mm_permute_ps(vW,_MM_SHUFFLE(1,1,1,1));
XMVECTOR vZ = _mm_permute_ps(vW,_MM_SHUFFLE(2,2,2,2));
vW = _mm_permute_ps(vW,_MM_SHUFFLE(3,3,3,3));
// Perform the operation on the first row
vX = _mm_mul_ps(vX,M2.r[0]);
vX = _mm_macc_ps(vY,M2.r[1],vX);
vX = _mm_macc_ps(vZ,M2.r[2],vX);
vX = _mm_macc_ps(vW,M2.r[3],vX);
mResult.r[0] = vX;
// Repeat for the other 3 rows
vW = M1.r[1];
vX = _mm_permute_ps(vW,_MM_SHUFFLE(0,0,0,0));
vY = _mm_permute_ps(vW,_MM_SHUFFLE(1,1,1,1));
vZ = _mm_permute_ps(vW,_MM_SHUFFLE(2,2,2,2));
vW = _mm_permute_ps(vW,_MM_SHUFFLE(3,3,3,3));
vX = _mm_mul_ps(vX,M2.r[0]);
vX = _mm_macc_ps(vY,M2.r[1],vX);
vX = _mm_macc_ps(vZ,M2.r[2],vX);
vX = _mm_macc_ps(vW,M2.r[3],vX);
mResult.r[1] = vX;
vW = M1.r[2];
vX = _mm_permute_ps(vW,_MM_SHUFFLE(0,0,0,0));
vY = _mm_permute_ps(vW,_MM_SHUFFLE(1,1,1,1));
vZ = _mm_permute_ps(vW,_MM_SHUFFLE(2,2,2,2));
vW = _mm_permute_ps(vW,_MM_SHUFFLE(3,3,3,3));
vX = _mm_mul_ps(vX,M2.r[0]);
vX = _mm_macc_ps(vY,M2.r[1],vX);
vX = _mm_macc_ps(vZ,M2.r[2],vX);
vX = _mm_macc_ps(vW,M2.r[3],vX);
mResult.r[2] = vX;
vW = M1.r[3];
vX = _mm_permute_ps(vW,_MM_SHUFFLE(0,0,0,0));
vY = _mm_permute_ps(vW,_MM_SHUFFLE(1,1,1,1));
vZ = _mm_permute_ps(vW,_MM_SHUFFLE(2,2,2,2));
vW = _mm_permute_ps(vW,_MM_SHUFFLE(3,3,3,3));
vX = _mm_mul_ps(vX,M2.r[0]);
vX = _mm_macc_ps(vY,M2.r[1],vX);
vX = _mm_macc_ps(vZ,M2.r[2],vX);
vX = _mm_macc_ps(vW,M2.r[3],vX);
mResult.r[3] = vX;
return mResult;
}
inline XMMATRIX XM_CALLCONV XMMatrixMultiplyTranspose
(
FXMMATRIX M1,
CXMMATRIX M2
)
{
// Use vW to hold the original row
XMVECTOR vW = M1.r[0];
// Splat the component X,Y,Z then W
XMVECTOR vX = _mm_permute_ps(vW,_MM_SHUFFLE(0,0,0,0));
XMVECTOR vY = _mm_permute_ps(vW,_MM_SHUFFLE(1,1,1,1));
XMVECTOR vZ = _mm_permute_ps(vW,_MM_SHUFFLE(2,2,2,2));
vW = _mm_permute_ps(vW,_MM_SHUFFLE(3,3,3,3));
// Perform the operation on the first row
vX = _mm_mul_ps(vX,M2.r[0]);
vX = _mm_macc_ps(vY,M2.r[1],vX);
vX = _mm_macc_ps(vZ,M2.r[2],vX);
vX = _mm_macc_ps(vW,M2.r[3],vX);
__m128 r0 = vX;
// Repeat for the other 3 rows
vW = M1.r[1];
vX = _mm_permute_ps(vW,_MM_SHUFFLE(0,0,0,0));
vY = _mm_permute_ps(vW,_MM_SHUFFLE(1,1,1,1));
vZ = _mm_permute_ps(vW,_MM_SHUFFLE(2,2,2,2));
vW = _mm_permute_ps(vW,_MM_SHUFFLE(3,3,3,3));
vX = _mm_mul_ps(vX,M2.r[0]);
vX = _mm_macc_ps(vY,M2.r[1],vX);
vX = _mm_macc_ps(vZ,M2.r[2],vX);
vX = _mm_macc_ps(vW,M2.r[3],vX);
__m128 r1 = vX;
vW = M1.r[2];
vX = _mm_permute_ps(vW,_MM_SHUFFLE(0,0,0,0));
vY = _mm_permute_ps(vW,_MM_SHUFFLE(1,1,1,1));
vZ = _mm_permute_ps(vW,_MM_SHUFFLE(2,2,2,2));
vW = _mm_permute_ps(vW,_MM_SHUFFLE(3,3,3,3));
vX = _mm_mul_ps(vX,M2.r[0]);
vX = _mm_macc_ps(vY,M2.r[1],vX);
vX = _mm_macc_ps(vZ,M2.r[2],vX);
vX = _mm_macc_ps(vW,M2.r[3],vX);
__m128 r2 = vX;
vW = M1.r[3];
vX = _mm_permute_ps(vW,_MM_SHUFFLE(0,0,0,0));
vY = _mm_permute_ps(vW,_MM_SHUFFLE(1,1,1,1));
vZ = _mm_permute_ps(vW,_MM_SHUFFLE(2,2,2,2));
vW = _mm_permute_ps(vW,_MM_SHUFFLE(3,3,3,3));
vX = _mm_mul_ps(vX,M2.r[0]);
vX = _mm_macc_ps(vY,M2.r[1],vX);
vX = _mm_macc_ps(vZ,M2.r[2],vX);
vX = _mm_macc_ps(vW,M2.r[3],vX);
__m128 r3 = vX;
// x.x,x.y,y.x,y.y
XMVECTOR vTemp1 = _mm_shuffle_ps(r0,r1,_MM_SHUFFLE(1,0,1,0));
// x.z,x.w,y.z,y.w
XMVECTOR vTemp3 = _mm_shuffle_ps(r0,r1,_MM_SHUFFLE(3,2,3,2));
// z.x,z.y,w.x,w.y
XMVECTOR vTemp2 = _mm_shuffle_ps(r2,r3,_MM_SHUFFLE(1,0,1,0));
// z.z,z.w,w.z,w.w
XMVECTOR vTemp4 = _mm_shuffle_ps(r2,r3,_MM_SHUFFLE(3,2,3,2));
XMMATRIX mResult;
// x.x,y.x,z.x,w.x
mResult.r[0] = _mm_shuffle_ps(vTemp1, vTemp2,_MM_SHUFFLE(2,0,2,0));
// x.y,y.y,z.y,w.y
mResult.r[1] = _mm_shuffle_ps(vTemp1, vTemp2,_MM_SHUFFLE(3,1,3,1));
// x.z,y.z,z.z,w.z
mResult.r[2] = _mm_shuffle_ps(vTemp3, vTemp4,_MM_SHUFFLE(2,0,2,0));
// x.w,y.w,z.w,w.w
mResult.r[3] = _mm_shuffle_ps(vTemp3, vTemp4,_MM_SHUFFLE(3,1,3,1));
return mResult;
}
}; // namespace FMA4
}; // namespace DirectX;
//-------------------------------------------------------------------------------------
// DirectXMathFMA4.h -- FMA4 extensions for SIMD C++ Math library
//
// THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF
// ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO
// THE IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A
// PARTICULAR PURPOSE.
//
// Copyright (c) Microsoft Corporation. All rights reserved.
//
// http://go.microsoft.com/fwlink/?LinkID=615560
//-------------------------------------------------------------------------------------
#ifdef _MSC_VER
#pragma once
#endif
#ifdef _M_ARM
#error FMA4 not supported on ARM platform
#endif
#if defined(_MSC_VER) && (_MSC_VER < 1600)
#error FMA4 intrinsics requires Visual C++ 2010 Service Pack 1 or later.
#endif
#pragma warning(push)
#pragma warning(disable : 4987)
#include <intrin.h>
#pragma warning(pop)
#include <ammintrin.h>
#include <DirectXMath.h>
namespace DirectX
{
#if (DIRECTXMATH_VERSION < 305) && !defined(XM_CALLCONV)
#define XM_CALLCONV __fastcall
typedef const DirectX::XMVECTOR& HXMVECTOR;
typedef const DirectX::XMMATRIX& FXMMATRIX;
#endif
namespace FMA4
{
inline bool XMVerifyFMA4Support()
{
// Should return true for AMD Bulldozer processors
// with OS support for AVX (Windows 7 Service Pack 1, Windows Server 2008 R2 Service Pack 1, Windows 8, Windows Server 2012)
// See http://msdn.microsoft.com/en-us/library/hskdteyh.aspx
int CPUInfo[4] = {-1};
__cpuid( CPUInfo, 0 );
if ( CPUInfo[0] < 1 )
return false;
__cpuid(CPUInfo, 1 );
// We check for AVX, OSXSAVE (required to access FMA4)
if ( (CPUInfo[2] & 0x18000000) != 0x18000000 )
return false;
__cpuid( CPUInfo, 0x80000000 );
if ( CPUInfo[0] < 0x80000001 )
return false;
// We check for FMA4
return ( CPUInfo[2] & 0x10000 );
}
//-------------------------------------------------------------------------------------
// Vector
//-------------------------------------------------------------------------------------
inline XMVECTOR XM_CALLCONV XMVectorMultiplyAdd
(
FXMVECTOR V1,
FXMVECTOR V2,
FXMVECTOR V3
)
{
return _mm_macc_ps( V1, V2, V3 );
}
inline XMVECTOR XM_CALLCONV XMVectorNegativeMultiplySubtract
(
FXMVECTOR V1,
FXMVECTOR V2,
FXMVECTOR V3
)
{
return _mm_nmacc_ps( V1, V2, V3 );
}
//-------------------------------------------------------------------------------------
// Vector2
//-------------------------------------------------------------------------------------
inline XMVECTOR XM_CALLCONV XMVector2Transform
(
FXMVECTOR V,
CXMMATRIX M
)
{
XMVECTOR vResult = _mm_permute_ps(V,_MM_SHUFFLE(1,1,1,1)); // Y
vResult = _mm_macc_ps( vResult, M.r[1], M.r[3] );
XMVECTOR vTemp = _mm_permute_ps(V,_MM_SHUFFLE(0,0,0,0)); // X
vResult = _mm_macc_ps( vTemp, M.r[0], vResult );
return vResult;
}
inline XMVECTOR XM_CALLCONV XMVector2TransformCoord
(
FXMVECTOR V,
CXMMATRIX M
)
{
XMVECTOR vResult = _mm_permute_ps(V,_MM_SHUFFLE(1,1,1,1)); // Y
vResult = _mm_macc_ps( vResult, M.r[1], M.r[3] );
XMVECTOR vTemp = _mm_permute_ps(V,_MM_SHUFFLE(0,0,0,0)); // X
vResult = _mm_macc_ps( vTemp, M.r[0], vResult );
XMVECTOR W = _mm_permute_ps(vResult,_MM_SHUFFLE(3,3,3,3));
vResult = _mm_div_ps( vResult, W );
return vResult;
}
inline XMVECTOR XM_CALLCONV XMVector2TransformNormal
(
FXMVECTOR V,
CXMMATRIX M
)
{
XMVECTOR vResult = _mm_permute_ps(V,_MM_SHUFFLE(1,1,1,1)); // Y
vResult = _mm_mul_ps( vResult, M.r[1] );
XMVECTOR vTemp = _mm_permute_ps(V,_MM_SHUFFLE(0,0,0,0)); // X
vResult = _mm_macc_ps( vTemp, M.r[0], vResult );
return vResult;
}
//-------------------------------------------------------------------------------------
// Vector3
//-------------------------------------------------------------------------------------
inline XMVECTOR XM_CALLCONV XMVector3Transform
(
FXMVECTOR V,
CXMMATRIX M
)
{
XMVECTOR vResult = _mm_permute_ps(V,_MM_SHUFFLE(2,2,2,2)); // Z
vResult = _mm_macc_ps( vResult, M.r[2], M.r[3] );
XMVECTOR vTemp = _mm_permute_ps(V,_MM_SHUFFLE(1,1,1,1)); // Y
vResult = _mm_macc_ps( vTemp, M.r[1], vResult );
vTemp = _mm_permute_ps(V,_MM_SHUFFLE(0,0,0,0)); // X
vResult = _mm_macc_ps( vTemp, M.r[0], vResult );
return vResult;
}
inline XMVECTOR XM_CALLCONV XMVector3TransformCoord
(
FXMVECTOR V,
CXMMATRIX M
)
{
XMVECTOR vResult = _mm_permute_ps(V,_MM_SHUFFLE(2,2,2,2)); // Z
vResult = _mm_macc_ps( vResult, M.r[2], M.r[3] );
XMVECTOR vTemp = _mm_permute_ps(V,_MM_SHUFFLE(1,1,1,1)); // Y
vResult = _mm_macc_ps( vTemp, M.r[1], vResult );
vTemp = _mm_permute_ps(V,_MM_SHUFFLE(0,0,0,0)); // X
vResult = _mm_macc_ps( vTemp, M.r[0], vResult );
XMVECTOR W = _mm_permute_ps(vResult,_MM_SHUFFLE(3,3,3,3));
vResult = _mm_div_ps( vResult, W );
return vResult;
}
inline XMVECTOR XM_CALLCONV XMVector3TransformNormal
(
FXMVECTOR V,
CXMMATRIX M
)
{
XMVECTOR vResult = _mm_permute_ps(V,_MM_SHUFFLE(2,2,2,2)); // Z
vResult = _mm_mul_ps( vResult, M.r[2] );
XMVECTOR vTemp = _mm_permute_ps(V,_MM_SHUFFLE(1,1,1,1)); // Y
vResult = _mm_macc_ps( vTemp, M.r[1], vResult );
vTemp = _mm_permute_ps(V,_MM_SHUFFLE(0,0,0,0)); // X
vResult = _mm_macc_ps( vTemp, M.r[0], vResult );
return vResult;
}
XMMATRIX XM_CALLCONV XMMatrixMultiply(CXMMATRIX M1, CXMMATRIX M2);
inline XMVECTOR XM_CALLCONV XMVector3Project
(
FXMVECTOR V,
float ViewportX,
float ViewportY,
float ViewportWidth,
float ViewportHeight,
float ViewportMinZ,
float ViewportMaxZ,
CXMMATRIX Projection,
CXMMATRIX View,
CXMMATRIX World
)
{
const float HalfViewportWidth = ViewportWidth * 0.5f;
const float HalfViewportHeight = ViewportHeight * 0.5f;
XMVECTOR Scale = XMVectorSet(HalfViewportWidth, -HalfViewportHeight, ViewportMaxZ - ViewportMinZ, 0.0f);
XMVECTOR Offset = XMVectorSet(ViewportX + HalfViewportWidth, ViewportY + HalfViewportHeight, ViewportMinZ, 0.0f);
XMMATRIX Transform = FMA4::XMMatrixMultiply(World, View);
Transform = FMA4::XMMatrixMultiply(Transform, Projection);
XMVECTOR Result = FMA4::XMVector3TransformCoord(V, Transform);
Result = FMA4::XMVectorMultiplyAdd(Result, Scale, Offset);
return Result;
}
inline XMVECTOR XM_CALLCONV XMVector3Unproject
(
FXMVECTOR V,
float ViewportX,
float ViewportY,
float ViewportWidth,
float ViewportHeight,
float ViewportMinZ,
float ViewportMaxZ,
CXMMATRIX Projection,
CXMMATRIX View,
CXMMATRIX World
)
{
static const XMVECTORF32 D = { -1.0f, 1.0f, 0.0f, 0.0f };
XMVECTOR Scale = XMVectorSet(ViewportWidth * 0.5f, -ViewportHeight * 0.5f, ViewportMaxZ - ViewportMinZ, 1.0f);
Scale = XMVectorReciprocal(Scale);
XMVECTOR Offset = XMVectorSet(-ViewportX, -ViewportY, -ViewportMinZ, 0.0f);
Offset = FMA4::XMVectorMultiplyAdd(Scale, Offset, D.v);
XMMATRIX Transform = FMA4::XMMatrixMultiply(World, View);
Transform = FMA4::XMMatrixMultiply(Transform, Projection);
Transform = XMMatrixInverse(nullptr, Transform);
XMVECTOR Result = FMA4::XMVectorMultiplyAdd(V, Scale, Offset);
return FMA4::XMVector3TransformCoord(Result, Transform);
}
//-------------------------------------------------------------------------------------
// Vector4
//-------------------------------------------------------------------------------------
inline XMVECTOR XM_CALLCONV XMVector4Transform
(
FXMVECTOR V,
CXMMATRIX M
)
{
XMVECTOR vResult = _mm_permute_ps(V,_MM_SHUFFLE(3,3,3,3)); // W
vResult = _mm_mul_ps( vResult, M.r[3] );
XMVECTOR vTemp = _mm_permute_ps(V,_MM_SHUFFLE(2,2,2,2)); // Z
vResult = _mm_macc_ps( vTemp, M.r[2], vResult );
vTemp = _mm_permute_ps(V,_MM_SHUFFLE(1,1,1,1)); // Y
vResult = _mm_macc_ps( vTemp, M.r[1], vResult );
vTemp = _mm_permute_ps(V,_MM_SHUFFLE(0,0,0,0)); // X
vResult = _mm_macc_ps( vTemp, M.r[0], vResult );
return vResult;
}
//-------------------------------------------------------------------------------------
// Matrix
//-------------------------------------------------------------------------------------
inline XMMATRIX XM_CALLCONV XMMatrixMultiply
(
CXMMATRIX M1,
CXMMATRIX M2
)
{
XMMATRIX mResult;
// Use vW to hold the original row
XMVECTOR vW = M1.r[0];
// Splat the component X,Y,Z then W
XMVECTOR vX = _mm_permute_ps(vW,_MM_SHUFFLE(0,0,0,0));
XMVECTOR vY = _mm_permute_ps(vW,_MM_SHUFFLE(1,1,1,1));
XMVECTOR vZ = _mm_permute_ps(vW,_MM_SHUFFLE(2,2,2,2));
vW = _mm_permute_ps(vW,_MM_SHUFFLE(3,3,3,3));
// Perform the operation on the first row
vX = _mm_mul_ps(vX,M2.r[0]);
vX = _mm_macc_ps(vY,M2.r[1],vX);
vX = _mm_macc_ps(vZ,M2.r[2],vX);
vX = _mm_macc_ps(vW,M2.r[3],vX);
mResult.r[0] = vX;
// Repeat for the other 3 rows
vW = M1.r[1];
vX = _mm_permute_ps(vW,_MM_SHUFFLE(0,0,0,0));
vY = _mm_permute_ps(vW,_MM_SHUFFLE(1,1,1,1));
vZ = _mm_permute_ps(vW,_MM_SHUFFLE(2,2,2,2));
vW = _mm_permute_ps(vW,_MM_SHUFFLE(3,3,3,3));
vX = _mm_mul_ps(vX,M2.r[0]);
vX = _mm_macc_ps(vY,M2.r[1],vX);
vX = _mm_macc_ps(vZ,M2.r[2],vX);
vX = _mm_macc_ps(vW,M2.r[3],vX);
mResult.r[1] = vX;
vW = M1.r[2];
vX = _mm_permute_ps(vW,_MM_SHUFFLE(0,0,0,0));
vY = _mm_permute_ps(vW,_MM_SHUFFLE(1,1,1,1));
vZ = _mm_permute_ps(vW,_MM_SHUFFLE(2,2,2,2));
vW = _mm_permute_ps(vW,_MM_SHUFFLE(3,3,3,3));
vX = _mm_mul_ps(vX,M2.r[0]);
vX = _mm_macc_ps(vY,M2.r[1],vX);
vX = _mm_macc_ps(vZ,M2.r[2],vX);
vX = _mm_macc_ps(vW,M2.r[3],vX);
mResult.r[2] = vX;
vW = M1.r[3];
vX = _mm_permute_ps(vW,_MM_SHUFFLE(0,0,0,0));
vY = _mm_permute_ps(vW,_MM_SHUFFLE(1,1,1,1));
vZ = _mm_permute_ps(vW,_MM_SHUFFLE(2,2,2,2));
vW = _mm_permute_ps(vW,_MM_SHUFFLE(3,3,3,3));
vX = _mm_mul_ps(vX,M2.r[0]);
vX = _mm_macc_ps(vY,M2.r[1],vX);
vX = _mm_macc_ps(vZ,M2.r[2],vX);
vX = _mm_macc_ps(vW,M2.r[3],vX);
mResult.r[3] = vX;
return mResult;
}
inline XMMATRIX XM_CALLCONV XMMatrixMultiplyTranspose
(
FXMMATRIX M1,
CXMMATRIX M2
)
{
// Use vW to hold the original row
XMVECTOR vW = M1.r[0];
// Splat the component X,Y,Z then W
XMVECTOR vX = _mm_permute_ps(vW,_MM_SHUFFLE(0,0,0,0));
XMVECTOR vY = _mm_permute_ps(vW,_MM_SHUFFLE(1,1,1,1));
XMVECTOR vZ = _mm_permute_ps(vW,_MM_SHUFFLE(2,2,2,2));
vW = _mm_permute_ps(vW,_MM_SHUFFLE(3,3,3,3));
// Perform the operation on the first row
vX = _mm_mul_ps(vX,M2.r[0]);
vX = _mm_macc_ps(vY,M2.r[1],vX);
vX = _mm_macc_ps(vZ,M2.r[2],vX);
vX = _mm_macc_ps(vW,M2.r[3],vX);
__m128 r0 = vX;
// Repeat for the other 3 rows
vW = M1.r[1];
vX = _mm_permute_ps(vW,_MM_SHUFFLE(0,0,0,0));
vY = _mm_permute_ps(vW,_MM_SHUFFLE(1,1,1,1));
vZ = _mm_permute_ps(vW,_MM_SHUFFLE(2,2,2,2));
vW = _mm_permute_ps(vW,_MM_SHUFFLE(3,3,3,3));
vX = _mm_mul_ps(vX,M2.r[0]);
vX = _mm_macc_ps(vY,M2.r[1],vX);
vX = _mm_macc_ps(vZ,M2.r[2],vX);
vX = _mm_macc_ps(vW,M2.r[3],vX);
__m128 r1 = vX;
vW = M1.r[2];
vX = _mm_permute_ps(vW,_MM_SHUFFLE(0,0,0,0));
vY = _mm_permute_ps(vW,_MM_SHUFFLE(1,1,1,1));
vZ = _mm_permute_ps(vW,_MM_SHUFFLE(2,2,2,2));
vW = _mm_permute_ps(vW,_MM_SHUFFLE(3,3,3,3));
vX = _mm_mul_ps(vX,M2.r[0]);
vX = _mm_macc_ps(vY,M2.r[1],vX);
vX = _mm_macc_ps(vZ,M2.r[2],vX);
vX = _mm_macc_ps(vW,M2.r[3],vX);
__m128 r2 = vX;
vW = M1.r[3];
vX = _mm_permute_ps(vW,_MM_SHUFFLE(0,0,0,0));
vY = _mm_permute_ps(vW,_MM_SHUFFLE(1,1,1,1));
vZ = _mm_permute_ps(vW,_MM_SHUFFLE(2,2,2,2));
vW = _mm_permute_ps(vW,_MM_SHUFFLE(3,3,3,3));
vX = _mm_mul_ps(vX,M2.r[0]);
vX = _mm_macc_ps(vY,M2.r[1],vX);
vX = _mm_macc_ps(vZ,M2.r[2],vX);
vX = _mm_macc_ps(vW,M2.r[3],vX);
__m128 r3 = vX;
// x.x,x.y,y.x,y.y
XMVECTOR vTemp1 = _mm_shuffle_ps(r0,r1,_MM_SHUFFLE(1,0,1,0));
// x.z,x.w,y.z,y.w
XMVECTOR vTemp3 = _mm_shuffle_ps(r0,r1,_MM_SHUFFLE(3,2,3,2));
// z.x,z.y,w.x,w.y
XMVECTOR vTemp2 = _mm_shuffle_ps(r2,r3,_MM_SHUFFLE(1,0,1,0));
// z.z,z.w,w.z,w.w
XMVECTOR vTemp4 = _mm_shuffle_ps(r2,r3,_MM_SHUFFLE(3,2,3,2));
XMMATRIX mResult;
// x.x,y.x,z.x,w.x
mResult.r[0] = _mm_shuffle_ps(vTemp1, vTemp2,_MM_SHUFFLE(2,0,2,0));
// x.y,y.y,z.y,w.y
mResult.r[1] = _mm_shuffle_ps(vTemp1, vTemp2,_MM_SHUFFLE(3,1,3,1));
// x.z,y.z,z.z,w.z
mResult.r[2] = _mm_shuffle_ps(vTemp3, vTemp4,_MM_SHUFFLE(2,0,2,0));
// x.w,y.w,z.w,w.w
mResult.r[3] = _mm_shuffle_ps(vTemp3, vTemp4,_MM_SHUFFLE(3,1,3,1));
return mResult;
}
}; // namespace FMA4
}; // namespace DirectX;

View File

@ -1,120 +1,120 @@
//-------------------------------------------------------------------------------------
// DirectXMathSSE3.h -- SSE3 extensions for SIMD C++ Math library
//
// THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF
// ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO
// THE IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A
// PARTICULAR PURPOSE.
//
// Copyright (c) Microsoft Corporation. All rights reserved.
//
// http://go.microsoft.com/fwlink/?LinkID=615560
//-------------------------------------------------------------------------------------
#ifdef _MSC_VER
#pragma once
#endif
#ifdef _M_ARM
#error SSE3 not supported on ARM platform
#endif
#pragma warning(push)
#pragma warning(disable : 4987)
#include <intrin.h>
#pragma warning(pop)
#include <pmmintrin.h>
#include <DirectXMath.h>
namespace DirectX
{
#if (DIRECTXMATH_VERSION < 305) && !defined(XM_CALLCONV)
#define XM_CALLCONV __fastcall
typedef const DirectX::XMVECTOR& HXMVECTOR;
typedef const DirectX::XMMATRIX& FXMMATRIX;
#endif
namespace SSE3
{
inline bool XMVerifySSE3Support()
{
// Should return true on AMD Athlon 64, AMD Phenom, and Intel Pentium 4 or later processors
// See http://msdn.microsoft.com/en-us/library/hskdteyh.aspx
int CPUInfo[4] = {-1};
__cpuid( CPUInfo, 0 );
if ( CPUInfo[0] < 1 )
return false;
__cpuid(CPUInfo, 1 );
// We only check for SSE3 instruction set. SSSE3 instructions are not used.
return ( (CPUInfo[2] & 0x1) != 0 );
}
inline XMVECTOR XM_CALLCONV XMVector2Dot
(
FXMVECTOR V1,
FXMVECTOR V2
)
{
XMVECTOR vTemp = _mm_mul_ps(V1,V2);
vTemp = _mm_hadd_ps(vTemp,vTemp);
return _mm_shuffle_ps(vTemp,vTemp,_MM_SHUFFLE(0,0,0,0));
}
inline XMVECTOR XM_CALLCONV XMVector2LengthSq( FXMVECTOR V )
{
return SSE3::XMVector2Dot(V, V);
}
inline XMVECTOR XM_CALLCONV XMVector3Dot
(
FXMVECTOR V1,
FXMVECTOR V2
)
{
XMVECTOR vTemp = _mm_mul_ps(V1,V2);
vTemp = _mm_and_ps( vTemp, g_XMMask3 );
vTemp = _mm_hadd_ps(vTemp,vTemp);
return _mm_hadd_ps(vTemp,vTemp);
}
inline XMVECTOR XM_CALLCONV XMVector3LengthSq( FXMVECTOR V )
{
return SSE3::XMVector3Dot(V, V);
}
inline XMVECTOR XM_CALLCONV XMVector4Dot
(
FXMVECTOR V1,
FXMVECTOR V2
)
{
XMVECTOR vTemp = _mm_mul_ps(V1,V2);
vTemp = _mm_hadd_ps( vTemp, vTemp );
return _mm_hadd_ps( vTemp, vTemp );
}
inline XMVECTOR XM_CALLCONV XMVector4LengthSq( FXMVECTOR V )
{
return SSE3::XMVector4Dot(V, V);
}
inline XMVECTOR XM_CALLCONV XMVectorSwizzle_0022( FXMVECTOR V )
{
return _mm_moveldup_ps(V);
}
inline XMVECTOR XM_CALLCONV XMVectorSwizzle_1133( FXMVECTOR V )
{
return _mm_movehdup_ps(V);
}
}; // namespace SSE3
//-------------------------------------------------------------------------------------
// DirectXMathSSE3.h -- SSE3 extensions for SIMD C++ Math library
//
// THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF
// ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO
// THE IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A
// PARTICULAR PURPOSE.
//
// Copyright (c) Microsoft Corporation. All rights reserved.
//
// http://go.microsoft.com/fwlink/?LinkID=615560
//-------------------------------------------------------------------------------------
#ifdef _MSC_VER
#pragma once
#endif
#ifdef _M_ARM
#error SSE3 not supported on ARM platform
#endif
#pragma warning(push)
#pragma warning(disable : 4987)
#include <intrin.h>
#pragma warning(pop)
#include <pmmintrin.h>
#include <DirectXMath.h>
namespace DirectX
{
#if (DIRECTXMATH_VERSION < 305) && !defined(XM_CALLCONV)
#define XM_CALLCONV __fastcall
typedef const DirectX::XMVECTOR& HXMVECTOR;
typedef const DirectX::XMMATRIX& FXMMATRIX;
#endif
namespace SSE3
{
inline bool XMVerifySSE3Support()
{
// Should return true on AMD Athlon 64, AMD Phenom, and Intel Pentium 4 or later processors
// See http://msdn.microsoft.com/en-us/library/hskdteyh.aspx
int CPUInfo[4] = {-1};
__cpuid( CPUInfo, 0 );
if ( CPUInfo[0] < 1 )
return false;
__cpuid(CPUInfo, 1 );
// We only check for SSE3 instruction set. SSSE3 instructions are not used.
return ( (CPUInfo[2] & 0x1) != 0 );
}
inline XMVECTOR XM_CALLCONV XMVector2Dot
(
FXMVECTOR V1,
FXMVECTOR V2
)
{
XMVECTOR vTemp = _mm_mul_ps(V1,V2);
vTemp = _mm_hadd_ps(vTemp,vTemp);
return _mm_shuffle_ps(vTemp,vTemp,_MM_SHUFFLE(0,0,0,0));
}
inline XMVECTOR XM_CALLCONV XMVector2LengthSq( FXMVECTOR V )
{
return SSE3::XMVector2Dot(V, V);
}
inline XMVECTOR XM_CALLCONV XMVector3Dot
(
FXMVECTOR V1,
FXMVECTOR V2
)
{
XMVECTOR vTemp = _mm_mul_ps(V1,V2);
vTemp = _mm_and_ps( vTemp, g_XMMask3 );
vTemp = _mm_hadd_ps(vTemp,vTemp);
return _mm_hadd_ps(vTemp,vTemp);
}
inline XMVECTOR XM_CALLCONV XMVector3LengthSq( FXMVECTOR V )
{
return SSE3::XMVector3Dot(V, V);
}
inline XMVECTOR XM_CALLCONV XMVector4Dot
(
FXMVECTOR V1,
FXMVECTOR V2
)
{
XMVECTOR vTemp = _mm_mul_ps(V1,V2);
vTemp = _mm_hadd_ps( vTemp, vTemp );
return _mm_hadd_ps( vTemp, vTemp );
}
inline XMVECTOR XM_CALLCONV XMVector4LengthSq( FXMVECTOR V )
{
return SSE3::XMVector4Dot(V, V);
}
inline XMVECTOR XM_CALLCONV XMVectorSwizzle_0022( FXMVECTOR V )
{
return _mm_moveldup_ps(V);
}
inline XMVECTOR XM_CALLCONV XMVectorSwizzle_1133( FXMVECTOR V )
{
return _mm_movehdup_ps(V);
}
}; // namespace SSE3
}; // namespace DirectX;

View File

@ -1,422 +1,422 @@
//-------------------------------------------------------------------------------------
// DirectXMathSSE4.h -- SSE4.1 extensions for SIMD C++ Math library
//
// THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF
// ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO
// THE IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A
// PARTICULAR PURPOSE.
//
// Copyright (c) Microsoft Corporation. All rights reserved.
//
// http://go.microsoft.com/fwlink/?LinkID=615560
//-------------------------------------------------------------------------------------
#ifdef _MSC_VER
#pragma once
#endif
#ifdef _M_ARM
#error SSE4 not supported on ARM platform
#endif
#pragma warning(push)
#pragma warning(disable : 4987)
#include <intrin.h>
#pragma warning(pop)
#include <smmintrin.h>
#include <DirectXMath.h>
namespace DirectX
{
#if (DIRECTXMATH_VERSION < 305) && !defined(XM_CALLCONV)
#define XM_CALLCONV __fastcall
typedef const DirectX::XMVECTOR& HXMVECTOR;
typedef const DirectX::XMMATRIX& FXMMATRIX;
#endif
namespace SSE4
{
inline bool XMVerifySSE4Support()
{
// Should return true on AMD Bulldozer, Intel Core 2 ("Penryn"), and Intel Core i7 ("Nehalem") or later processors
// See http://msdn.microsoft.com/en-us/library/hskdteyh.aspx
int CPUInfo[4] = {-1};
__cpuid( CPUInfo, 0 );
if ( CPUInfo[0] < 1 )
return false;
__cpuid(CPUInfo, 1 );
// We only check for SSE4.1 instruction set. SSE4.2 instructions are not used.
return ( (CPUInfo[2] & 0x80000) == 0x80000 );
}
//-------------------------------------------------------------------------------------
// Vector
//-------------------------------------------------------------------------------------
inline void XM_CALLCONV XMVectorGetYPtr(_Out_ float *y, _In_ FXMVECTOR V)
{
assert( y != nullptr );
*((int*)y) = _mm_extract_ps( V, 1 );
}
inline void XM_CALLCONV XMVectorGetZPtr(_Out_ float *z, _In_ FXMVECTOR V)
{
assert( z != nullptr );
*((int*)z) = _mm_extract_ps( V, 2 );
}
inline void XM_CALLCONV XMVectorGetWPtr(_Out_ float *w, _In_ FXMVECTOR V)
{
assert( w != nullptr );
*((int*)w) = _mm_extract_ps( V, 3 );
}
inline uint32_t XM_CALLCONV XMVectorGetIntY(FXMVECTOR V)
{
__m128i V1 = _mm_castps_si128( V );
return static_cast<uint32_t>( _mm_extract_epi32( V1, 1 ) );
}
inline uint32_t XM_CALLCONV XMVectorGetIntZ(FXMVECTOR V)
{
__m128i V1 = _mm_castps_si128( V );
return static_cast<uint32_t>( _mm_extract_epi32( V1, 2 ) );
}
inline uint32_t XM_CALLCONV XMVectorGetIntW(FXMVECTOR V)
{
__m128i V1 = _mm_castps_si128( V );
return static_cast<uint32_t>( _mm_extract_epi32( V1, 3 ) );
}
inline void XM_CALLCONV XMVectorGetIntYPtr(_Out_ uint32_t *y, _In_ FXMVECTOR V)
{
assert( y != nullptr );
__m128i V1 = _mm_castps_si128( V );
*y = static_cast<uint32_t>( _mm_extract_epi32( V1, 1 ) );
}
inline void XM_CALLCONV XMVectorGetIntZPtr(_Out_ uint32_t *z, _In_ FXMVECTOR V)
{
assert( z != nullptr );
__m128i V1 = _mm_castps_si128( V );
*z = static_cast<uint32_t>( _mm_extract_epi32( V1, 2 ) );
}
inline void XM_CALLCONV XMVectorGetIntWPtr(_Out_ uint32_t *w, _In_ FXMVECTOR V)
{
assert( w != nullptr );
__m128i V1 = _mm_castps_si128( V );
*w = static_cast<uint32_t>( _mm_extract_epi32( V1, 3 ) );
}
inline XMVECTOR XM_CALLCONV XMVectorSetY(FXMVECTOR V, float y)
{
XMVECTOR vResult = _mm_set_ss(y);
vResult = _mm_insert_ps( V, vResult, 0x10 );
return vResult;
}
inline XMVECTOR XM_CALLCONV XMVectorSetZ(FXMVECTOR V, float z)
{
XMVECTOR vResult = _mm_set_ss(z);
vResult = _mm_insert_ps( V, vResult, 0x20 );
return vResult;
}
inline XMVECTOR XM_CALLCONV XMVectorSetW(FXMVECTOR V, float w)
{
XMVECTOR vResult = _mm_set_ss(w);
vResult = _mm_insert_ps( V, vResult, 0x30 );
return vResult;
}
inline XMVECTOR XM_CALLCONV XMVectorSetIntY(FXMVECTOR V, uint32_t y)
{
__m128i vResult = _mm_castps_si128( V );
vResult = _mm_insert_epi32( vResult, static_cast<int>(y), 1 );
return _mm_castsi128_ps( vResult );
}
inline XMVECTOR XM_CALLCONV XMVectorSetIntZ(FXMVECTOR V, uint32_t z)
{
__m128i vResult = _mm_castps_si128( V );
vResult = _mm_insert_epi32( vResult, static_cast<int>(z), 2 );
return _mm_castsi128_ps( vResult );
}
inline XMVECTOR XM_CALLCONV XMVectorSetIntW(FXMVECTOR V, uint32_t w)
{
__m128i vResult = _mm_castps_si128( V );
vResult = _mm_insert_epi32( vResult, static_cast<int>(w), 3 );
return _mm_castsi128_ps( vResult );
}
inline XMVECTOR XM_CALLCONV XMVectorRound( FXMVECTOR V )
{
return _mm_round_ps( V, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC );
}
inline XMVECTOR XM_CALLCONV XMVectorTruncate( FXMVECTOR V )
{
return _mm_round_ps( V, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC );
}
inline XMVECTOR XM_CALLCONV XMVectorFloor( FXMVECTOR V )
{
return _mm_floor_ps( V );
}
inline XMVECTOR XM_CALLCONV XMVectorCeiling( FXMVECTOR V )
{
return _mm_ceil_ps( V );
}
//-------------------------------------------------------------------------------------
// Vector2
//-------------------------------------------------------------------------------------
inline XMVECTOR XM_CALLCONV XMVector2Dot( FXMVECTOR V1, FXMVECTOR V2 )
{
return _mm_dp_ps( V1, V2, 0x3f );
}
inline XMVECTOR XM_CALLCONV XMVector2LengthSq( FXMVECTOR V )
{
return SSE4::XMVector2Dot(V, V);
}
inline XMVECTOR XM_CALLCONV XMVector2ReciprocalLengthEst( FXMVECTOR V )
{
XMVECTOR vTemp = _mm_dp_ps( V, V, 0x3f );
return _mm_rsqrt_ps( vTemp );
}
inline XMVECTOR XM_CALLCONV XMVector2ReciprocalLength( FXMVECTOR V )
{
XMVECTOR vTemp = _mm_dp_ps( V, V, 0x3f );
XMVECTOR vLengthSq = _mm_sqrt_ps( vTemp );
return _mm_div_ps( g_XMOne, vLengthSq );
}
inline XMVECTOR XM_CALLCONV XMVector2LengthEst( FXMVECTOR V )
{
XMVECTOR vTemp = _mm_dp_ps( V, V, 0x3f );
return _mm_sqrt_ps( vTemp );
}
inline XMVECTOR XM_CALLCONV XMVector2Length( FXMVECTOR V )
{
XMVECTOR vTemp = _mm_dp_ps( V, V, 0x3f );
return _mm_sqrt_ps( vTemp );
}
inline XMVECTOR XM_CALLCONV XMVector2NormalizeEst( FXMVECTOR V )
{
XMVECTOR vTemp = _mm_dp_ps( V, V, 0x3f );
XMVECTOR vResult = _mm_rsqrt_ps( vTemp );
return _mm_mul_ps(vResult, V);
}
inline XMVECTOR XM_CALLCONV XMVector2Normalize( FXMVECTOR V )
{
XMVECTOR vLengthSq = _mm_dp_ps( V, V, 0x3f );
// Prepare for the division
XMVECTOR vResult = _mm_sqrt_ps(vLengthSq);
// Create zero with a single instruction
XMVECTOR vZeroMask = _mm_setzero_ps();
// Test for a divide by zero (Must be FP to detect -0.0)
vZeroMask = _mm_cmpneq_ps(vZeroMask,vResult);
// Failsafe on zero (Or epsilon) length planes
// If the length is infinity, set the elements to zero
vLengthSq = _mm_cmpneq_ps(vLengthSq,g_XMInfinity);
// Reciprocal mul to perform the normalization
vResult = _mm_div_ps(V,vResult);
// Any that are infinity, set to zero
vResult = _mm_and_ps(vResult,vZeroMask);
// Select qnan or result based on infinite length
XMVECTOR vTemp1 = _mm_andnot_ps(vLengthSq,g_XMQNaN);
XMVECTOR vTemp2 = _mm_and_ps(vResult,vLengthSq);
vResult = _mm_or_ps(vTemp1,vTemp2);
return vResult;
}
//-------------------------------------------------------------------------------------
// Vector3
//-------------------------------------------------------------------------------------
inline XMVECTOR XM_CALLCONV XMVector3Dot( FXMVECTOR V1, FXMVECTOR V2 )
{
return _mm_dp_ps( V1, V2, 0x7f );
}
inline XMVECTOR XM_CALLCONV XMVector3LengthSq( FXMVECTOR V )
{
return SSE4::XMVector3Dot(V, V);
}
inline XMVECTOR XM_CALLCONV XMVector3ReciprocalLengthEst( FXMVECTOR V )
{
XMVECTOR vTemp = _mm_dp_ps( V, V, 0x7f );
return _mm_rsqrt_ps( vTemp );
}
inline XMVECTOR XM_CALLCONV XMVector3ReciprocalLength( FXMVECTOR V )
{
XMVECTOR vTemp = _mm_dp_ps( V, V, 0x7f );
XMVECTOR vLengthSq = _mm_sqrt_ps( vTemp );
return _mm_div_ps( g_XMOne, vLengthSq );
}
inline XMVECTOR XM_CALLCONV XMVector3LengthEst( FXMVECTOR V )
{
XMVECTOR vTemp = _mm_dp_ps( V, V, 0x7f );
return _mm_sqrt_ps( vTemp );
}
inline XMVECTOR XM_CALLCONV XMVector3Length( FXMVECTOR V )
{
XMVECTOR vTemp = _mm_dp_ps( V, V, 0x7f );
return _mm_sqrt_ps( vTemp );
}
inline XMVECTOR XM_CALLCONV XMVector3NormalizeEst( FXMVECTOR V )
{
XMVECTOR vTemp = _mm_dp_ps( V, V, 0x7f );
XMVECTOR vResult = _mm_rsqrt_ps( vTemp );
return _mm_mul_ps(vResult, V);
}
inline XMVECTOR XM_CALLCONV XMVector3Normalize( FXMVECTOR V )
{
XMVECTOR vLengthSq = _mm_dp_ps( V, V, 0x7f );
// Prepare for the division
XMVECTOR vResult = _mm_sqrt_ps(vLengthSq);
// Create zero with a single instruction
XMVECTOR vZeroMask = _mm_setzero_ps();
// Test for a divide by zero (Must be FP to detect -0.0)
vZeroMask = _mm_cmpneq_ps(vZeroMask,vResult);
// Failsafe on zero (Or epsilon) length planes
// If the length is infinity, set the elements to zero
vLengthSq = _mm_cmpneq_ps(vLengthSq,g_XMInfinity);
// Divide to perform the normalization
vResult = _mm_div_ps(V,vResult);
// Any that are infinity, set to zero
vResult = _mm_and_ps(vResult,vZeroMask);
// Select qnan or result based on infinite length
XMVECTOR vTemp1 = _mm_andnot_ps(vLengthSq,g_XMQNaN);
XMVECTOR vTemp2 = _mm_and_ps(vResult,vLengthSq);
vResult = _mm_or_ps(vTemp1,vTemp2);
return vResult;
}
//-------------------------------------------------------------------------------------
// Vector4
//-------------------------------------------------------------------------------------
inline XMVECTOR XM_CALLCONV XMVector4Dot( FXMVECTOR V1, FXMVECTOR V2 )
{
return _mm_dp_ps( V1, V2, 0xff );
}
inline XMVECTOR XM_CALLCONV XMVector4LengthSq( FXMVECTOR V )
{
return SSE4::XMVector4Dot(V, V);
}
inline XMVECTOR XM_CALLCONV XMVector4ReciprocalLengthEst( FXMVECTOR V )
{
XMVECTOR vTemp = _mm_dp_ps( V, V, 0xff );
return _mm_rsqrt_ps( vTemp );
}
inline XMVECTOR XM_CALLCONV XMVector4ReciprocalLength( FXMVECTOR V )
{
XMVECTOR vTemp = _mm_dp_ps( V, V, 0xff );
XMVECTOR vLengthSq = _mm_sqrt_ps( vTemp );
return _mm_div_ps( g_XMOne, vLengthSq );
}
inline XMVECTOR XM_CALLCONV XMVector4LengthEst( FXMVECTOR V )
{
XMVECTOR vTemp = _mm_dp_ps( V, V, 0xff );
return _mm_sqrt_ps( vTemp );
}
inline XMVECTOR XM_CALLCONV XMVector4Length( FXMVECTOR V )
{
XMVECTOR vTemp = _mm_dp_ps( V, V, 0xff );
return _mm_sqrt_ps( vTemp );
}
inline XMVECTOR XM_CALLCONV XMVector4NormalizeEst( FXMVECTOR V )
{
XMVECTOR vTemp = _mm_dp_ps( V, V, 0xff );
XMVECTOR vResult = _mm_rsqrt_ps( vTemp );
return _mm_mul_ps(vResult, V);
}
inline XMVECTOR XM_CALLCONV XMVector4Normalize( FXMVECTOR V )
{
XMVECTOR vLengthSq = _mm_dp_ps( V, V, 0xff );
// Prepare for the division
XMVECTOR vResult = _mm_sqrt_ps(vLengthSq);
// Create zero with a single instruction
XMVECTOR vZeroMask = _mm_setzero_ps();
// Test for a divide by zero (Must be FP to detect -0.0)
vZeroMask = _mm_cmpneq_ps(vZeroMask,vResult);
// Failsafe on zero (Or epsilon) length planes
// If the length is infinity, set the elements to zero
vLengthSq = _mm_cmpneq_ps(vLengthSq,g_XMInfinity);
// Divide to perform the normalization
vResult = _mm_div_ps(V,vResult);
// Any that are infinity, set to zero
vResult = _mm_and_ps(vResult,vZeroMask);
// Select qnan or result based on infinite length
XMVECTOR vTemp1 = _mm_andnot_ps(vLengthSq,g_XMQNaN);
XMVECTOR vTemp2 = _mm_and_ps(vResult,vLengthSq);
vResult = _mm_or_ps(vTemp1,vTemp2);
return vResult;
}
//-------------------------------------------------------------------------------------
// Plane
//-------------------------------------------------------------------------------------
inline XMVECTOR XM_CALLCONV XMPlaneNormalizeEst( FXMVECTOR P )
{
XMVECTOR vTemp = _mm_dp_ps( P, P, 0x7f );
XMVECTOR vResult = _mm_rsqrt_ps( vTemp );
return _mm_mul_ps(vResult, P);
}
inline XMVECTOR XM_CALLCONV XMPlaneNormalize( FXMVECTOR P )
{
XMVECTOR vLengthSq = _mm_dp_ps( P, P, 0x7f );
// Prepare for the division
XMVECTOR vResult = _mm_sqrt_ps(vLengthSq);
// Failsafe on zero (Or epsilon) length planes
// If the length is infinity, set the elements to zero
vLengthSq = _mm_cmpneq_ps(vLengthSq,g_XMInfinity);
// Reciprocal mul to perform the normalization
vResult = _mm_div_ps(P,vResult);
// Any that are infinity, set to zero
vResult = _mm_and_ps(vResult,vLengthSq);
return vResult;
}
}; // namespace SSE4
//-------------------------------------------------------------------------------------
// DirectXMathSSE4.h -- SSE4.1 extensions for SIMD C++ Math library
//
// THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF
// ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO
// THE IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A
// PARTICULAR PURPOSE.
//
// Copyright (c) Microsoft Corporation. All rights reserved.
//
// http://go.microsoft.com/fwlink/?LinkID=615560
//-------------------------------------------------------------------------------------
#ifdef _MSC_VER
#pragma once
#endif
#ifdef _M_ARM
#error SSE4 not supported on ARM platform
#endif
#pragma warning(push)
#pragma warning(disable : 4987)
#include <intrin.h>
#pragma warning(pop)
#include <smmintrin.h>
#include <DirectXMath.h>
namespace DirectX
{
#if (DIRECTXMATH_VERSION < 305) && !defined(XM_CALLCONV)
#define XM_CALLCONV __fastcall
typedef const DirectX::XMVECTOR& HXMVECTOR;
typedef const DirectX::XMMATRIX& FXMMATRIX;
#endif
namespace SSE4
{
inline bool XMVerifySSE4Support()
{
// Should return true on AMD Bulldozer, Intel Core 2 ("Penryn"), and Intel Core i7 ("Nehalem") or later processors
// See http://msdn.microsoft.com/en-us/library/hskdteyh.aspx
int CPUInfo[4] = {-1};
__cpuid( CPUInfo, 0 );
if ( CPUInfo[0] < 1 )
return false;
__cpuid(CPUInfo, 1 );
// We only check for SSE4.1 instruction set. SSE4.2 instructions are not used.
return ( (CPUInfo[2] & 0x80000) == 0x80000 );
}
//-------------------------------------------------------------------------------------
// Vector
//-------------------------------------------------------------------------------------
inline void XM_CALLCONV XMVectorGetYPtr(_Out_ float *y, _In_ FXMVECTOR V)
{
assert( y != nullptr );
*((int*)y) = _mm_extract_ps( V, 1 );
}
inline void XM_CALLCONV XMVectorGetZPtr(_Out_ float *z, _In_ FXMVECTOR V)
{
assert( z != nullptr );
*((int*)z) = _mm_extract_ps( V, 2 );
}
inline void XM_CALLCONV XMVectorGetWPtr(_Out_ float *w, _In_ FXMVECTOR V)
{
assert( w != nullptr );
*((int*)w) = _mm_extract_ps( V, 3 );
}
inline uint32_t XM_CALLCONV XMVectorGetIntY(FXMVECTOR V)
{
__m128i V1 = _mm_castps_si128( V );
return static_cast<uint32_t>( _mm_extract_epi32( V1, 1 ) );
}
inline uint32_t XM_CALLCONV XMVectorGetIntZ(FXMVECTOR V)
{
__m128i V1 = _mm_castps_si128( V );
return static_cast<uint32_t>( _mm_extract_epi32( V1, 2 ) );
}
inline uint32_t XM_CALLCONV XMVectorGetIntW(FXMVECTOR V)
{
__m128i V1 = _mm_castps_si128( V );
return static_cast<uint32_t>( _mm_extract_epi32( V1, 3 ) );
}
inline void XM_CALLCONV XMVectorGetIntYPtr(_Out_ uint32_t *y, _In_ FXMVECTOR V)
{
assert( y != nullptr );
__m128i V1 = _mm_castps_si128( V );
*y = static_cast<uint32_t>( _mm_extract_epi32( V1, 1 ) );
}
inline void XM_CALLCONV XMVectorGetIntZPtr(_Out_ uint32_t *z, _In_ FXMVECTOR V)
{
assert( z != nullptr );
__m128i V1 = _mm_castps_si128( V );
*z = static_cast<uint32_t>( _mm_extract_epi32( V1, 2 ) );
}
inline void XM_CALLCONV XMVectorGetIntWPtr(_Out_ uint32_t *w, _In_ FXMVECTOR V)
{
assert( w != nullptr );
__m128i V1 = _mm_castps_si128( V );
*w = static_cast<uint32_t>( _mm_extract_epi32( V1, 3 ) );
}
inline XMVECTOR XM_CALLCONV XMVectorSetY(FXMVECTOR V, float y)
{
XMVECTOR vResult = _mm_set_ss(y);
vResult = _mm_insert_ps( V, vResult, 0x10 );
return vResult;
}
inline XMVECTOR XM_CALLCONV XMVectorSetZ(FXMVECTOR V, float z)
{
XMVECTOR vResult = _mm_set_ss(z);
vResult = _mm_insert_ps( V, vResult, 0x20 );
return vResult;
}
inline XMVECTOR XM_CALLCONV XMVectorSetW(FXMVECTOR V, float w)
{
XMVECTOR vResult = _mm_set_ss(w);
vResult = _mm_insert_ps( V, vResult, 0x30 );
return vResult;
}
inline XMVECTOR XM_CALLCONV XMVectorSetIntY(FXMVECTOR V, uint32_t y)
{
__m128i vResult = _mm_castps_si128( V );
vResult = _mm_insert_epi32( vResult, static_cast<int>(y), 1 );
return _mm_castsi128_ps( vResult );
}
inline XMVECTOR XM_CALLCONV XMVectorSetIntZ(FXMVECTOR V, uint32_t z)
{
__m128i vResult = _mm_castps_si128( V );
vResult = _mm_insert_epi32( vResult, static_cast<int>(z), 2 );
return _mm_castsi128_ps( vResult );
}
inline XMVECTOR XM_CALLCONV XMVectorSetIntW(FXMVECTOR V, uint32_t w)
{
__m128i vResult = _mm_castps_si128( V );
vResult = _mm_insert_epi32( vResult, static_cast<int>(w), 3 );
return _mm_castsi128_ps( vResult );
}
inline XMVECTOR XM_CALLCONV XMVectorRound( FXMVECTOR V )
{
return _mm_round_ps( V, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC );
}
inline XMVECTOR XM_CALLCONV XMVectorTruncate( FXMVECTOR V )
{
return _mm_round_ps( V, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC );
}
inline XMVECTOR XM_CALLCONV XMVectorFloor( FXMVECTOR V )
{
return _mm_floor_ps( V );
}
inline XMVECTOR XM_CALLCONV XMVectorCeiling( FXMVECTOR V )
{
return _mm_ceil_ps( V );
}
//-------------------------------------------------------------------------------------
// Vector2
//-------------------------------------------------------------------------------------
inline XMVECTOR XM_CALLCONV XMVector2Dot( FXMVECTOR V1, FXMVECTOR V2 )
{
return _mm_dp_ps( V1, V2, 0x3f );
}
inline XMVECTOR XM_CALLCONV XMVector2LengthSq( FXMVECTOR V )
{
return SSE4::XMVector2Dot(V, V);
}
inline XMVECTOR XM_CALLCONV XMVector2ReciprocalLengthEst( FXMVECTOR V )
{
XMVECTOR vTemp = _mm_dp_ps( V, V, 0x3f );
return _mm_rsqrt_ps( vTemp );
}
inline XMVECTOR XM_CALLCONV XMVector2ReciprocalLength( FXMVECTOR V )
{
XMVECTOR vTemp = _mm_dp_ps( V, V, 0x3f );
XMVECTOR vLengthSq = _mm_sqrt_ps( vTemp );
return _mm_div_ps( g_XMOne, vLengthSq );
}
inline XMVECTOR XM_CALLCONV XMVector2LengthEst( FXMVECTOR V )
{
XMVECTOR vTemp = _mm_dp_ps( V, V, 0x3f );
return _mm_sqrt_ps( vTemp );
}
inline XMVECTOR XM_CALLCONV XMVector2Length( FXMVECTOR V )
{
XMVECTOR vTemp = _mm_dp_ps( V, V, 0x3f );
return _mm_sqrt_ps( vTemp );
}
inline XMVECTOR XM_CALLCONV XMVector2NormalizeEst( FXMVECTOR V )
{
XMVECTOR vTemp = _mm_dp_ps( V, V, 0x3f );
XMVECTOR vResult = _mm_rsqrt_ps( vTemp );
return _mm_mul_ps(vResult, V);
}
inline XMVECTOR XM_CALLCONV XMVector2Normalize( FXMVECTOR V )
{
XMVECTOR vLengthSq = _mm_dp_ps( V, V, 0x3f );
// Prepare for the division
XMVECTOR vResult = _mm_sqrt_ps(vLengthSq);
// Create zero with a single instruction
XMVECTOR vZeroMask = _mm_setzero_ps();
// Test for a divide by zero (Must be FP to detect -0.0)
vZeroMask = _mm_cmpneq_ps(vZeroMask,vResult);
// Failsafe on zero (Or epsilon) length planes
// If the length is infinity, set the elements to zero
vLengthSq = _mm_cmpneq_ps(vLengthSq,g_XMInfinity);
// Reciprocal mul to perform the normalization
vResult = _mm_div_ps(V,vResult);
// Any that are infinity, set to zero
vResult = _mm_and_ps(vResult,vZeroMask);
// Select qnan or result based on infinite length
XMVECTOR vTemp1 = _mm_andnot_ps(vLengthSq,g_XMQNaN);
XMVECTOR vTemp2 = _mm_and_ps(vResult,vLengthSq);
vResult = _mm_or_ps(vTemp1,vTemp2);
return vResult;
}
//-------------------------------------------------------------------------------------
// Vector3
//-------------------------------------------------------------------------------------
inline XMVECTOR XM_CALLCONV XMVector3Dot( FXMVECTOR V1, FXMVECTOR V2 )
{
return _mm_dp_ps( V1, V2, 0x7f );
}
inline XMVECTOR XM_CALLCONV XMVector3LengthSq( FXMVECTOR V )
{
return SSE4::XMVector3Dot(V, V);
}
inline XMVECTOR XM_CALLCONV XMVector3ReciprocalLengthEst( FXMVECTOR V )
{
XMVECTOR vTemp = _mm_dp_ps( V, V, 0x7f );
return _mm_rsqrt_ps( vTemp );
}
inline XMVECTOR XM_CALLCONV XMVector3ReciprocalLength( FXMVECTOR V )
{
XMVECTOR vTemp = _mm_dp_ps( V, V, 0x7f );
XMVECTOR vLengthSq = _mm_sqrt_ps( vTemp );
return _mm_div_ps( g_XMOne, vLengthSq );
}
inline XMVECTOR XM_CALLCONV XMVector3LengthEst( FXMVECTOR V )
{
XMVECTOR vTemp = _mm_dp_ps( V, V, 0x7f );
return _mm_sqrt_ps( vTemp );
}
inline XMVECTOR XM_CALLCONV XMVector3Length( FXMVECTOR V )
{
XMVECTOR vTemp = _mm_dp_ps( V, V, 0x7f );
return _mm_sqrt_ps( vTemp );
}
inline XMVECTOR XM_CALLCONV XMVector3NormalizeEst( FXMVECTOR V )
{
XMVECTOR vTemp = _mm_dp_ps( V, V, 0x7f );
XMVECTOR vResult = _mm_rsqrt_ps( vTemp );
return _mm_mul_ps(vResult, V);
}
inline XMVECTOR XM_CALLCONV XMVector3Normalize( FXMVECTOR V )
{
XMVECTOR vLengthSq = _mm_dp_ps( V, V, 0x7f );
// Prepare for the division
XMVECTOR vResult = _mm_sqrt_ps(vLengthSq);
// Create zero with a single instruction
XMVECTOR vZeroMask = _mm_setzero_ps();
// Test for a divide by zero (Must be FP to detect -0.0)
vZeroMask = _mm_cmpneq_ps(vZeroMask,vResult);
// Failsafe on zero (Or epsilon) length planes
// If the length is infinity, set the elements to zero
vLengthSq = _mm_cmpneq_ps(vLengthSq,g_XMInfinity);
// Divide to perform the normalization
vResult = _mm_div_ps(V,vResult);
// Any that are infinity, set to zero
vResult = _mm_and_ps(vResult,vZeroMask);
// Select qnan or result based on infinite length
XMVECTOR vTemp1 = _mm_andnot_ps(vLengthSq,g_XMQNaN);
XMVECTOR vTemp2 = _mm_and_ps(vResult,vLengthSq);
vResult = _mm_or_ps(vTemp1,vTemp2);
return vResult;
}
//-------------------------------------------------------------------------------------
// Vector4
//-------------------------------------------------------------------------------------
inline XMVECTOR XM_CALLCONV XMVector4Dot( FXMVECTOR V1, FXMVECTOR V2 )
{
return _mm_dp_ps( V1, V2, 0xff );
}
inline XMVECTOR XM_CALLCONV XMVector4LengthSq( FXMVECTOR V )
{
return SSE4::XMVector4Dot(V, V);
}
inline XMVECTOR XM_CALLCONV XMVector4ReciprocalLengthEst( FXMVECTOR V )
{
XMVECTOR vTemp = _mm_dp_ps( V, V, 0xff );
return _mm_rsqrt_ps( vTemp );
}
inline XMVECTOR XM_CALLCONV XMVector4ReciprocalLength( FXMVECTOR V )
{
XMVECTOR vTemp = _mm_dp_ps( V, V, 0xff );
XMVECTOR vLengthSq = _mm_sqrt_ps( vTemp );
return _mm_div_ps( g_XMOne, vLengthSq );
}
inline XMVECTOR XM_CALLCONV XMVector4LengthEst( FXMVECTOR V )
{
XMVECTOR vTemp = _mm_dp_ps( V, V, 0xff );
return _mm_sqrt_ps( vTemp );
}
inline XMVECTOR XM_CALLCONV XMVector4Length( FXMVECTOR V )
{
XMVECTOR vTemp = _mm_dp_ps( V, V, 0xff );
return _mm_sqrt_ps( vTemp );
}
inline XMVECTOR XM_CALLCONV XMVector4NormalizeEst( FXMVECTOR V )
{
XMVECTOR vTemp = _mm_dp_ps( V, V, 0xff );
XMVECTOR vResult = _mm_rsqrt_ps( vTemp );
return _mm_mul_ps(vResult, V);
}
inline XMVECTOR XM_CALLCONV XMVector4Normalize( FXMVECTOR V )
{
XMVECTOR vLengthSq = _mm_dp_ps( V, V, 0xff );
// Prepare for the division
XMVECTOR vResult = _mm_sqrt_ps(vLengthSq);
// Create zero with a single instruction
XMVECTOR vZeroMask = _mm_setzero_ps();
// Test for a divide by zero (Must be FP to detect -0.0)
vZeroMask = _mm_cmpneq_ps(vZeroMask,vResult);
// Failsafe on zero (Or epsilon) length planes
// If the length is infinity, set the elements to zero
vLengthSq = _mm_cmpneq_ps(vLengthSq,g_XMInfinity);
// Divide to perform the normalization
vResult = _mm_div_ps(V,vResult);
// Any that are infinity, set to zero
vResult = _mm_and_ps(vResult,vZeroMask);
// Select qnan or result based on infinite length
XMVECTOR vTemp1 = _mm_andnot_ps(vLengthSq,g_XMQNaN);
XMVECTOR vTemp2 = _mm_and_ps(vResult,vLengthSq);
vResult = _mm_or_ps(vTemp1,vTemp2);
return vResult;
}
//-------------------------------------------------------------------------------------
// Plane
//-------------------------------------------------------------------------------------
inline XMVECTOR XM_CALLCONV XMPlaneNormalizeEst( FXMVECTOR P )
{
XMVECTOR vTemp = _mm_dp_ps( P, P, 0x7f );
XMVECTOR vResult = _mm_rsqrt_ps( vTemp );
return _mm_mul_ps(vResult, P);
}
inline XMVECTOR XM_CALLCONV XMPlaneNormalize( FXMVECTOR P )
{
XMVECTOR vLengthSq = _mm_dp_ps( P, P, 0x7f );
// Prepare for the division
XMVECTOR vResult = _mm_sqrt_ps(vLengthSq);
// Failsafe on zero (Or epsilon) length planes
// If the length is infinity, set the elements to zero
vLengthSq = _mm_cmpneq_ps(vLengthSq,g_XMInfinity);
// Reciprocal mul to perform the normalization
vResult = _mm_div_ps(P,vResult);
// Any that are infinity, set to zero
vResult = _mm_and_ps(vResult,vLengthSq);
return vResult;
}
}; // namespace SSE4
}; // namespace DirectX;

View File

@ -1,341 +1,341 @@
//-------------------------------------------------------------------------------------
// DirectXCollision.h -- C++ Collision Math library
//
// THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF
// ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO
// THE IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A
// PARTICULAR PURPOSE.
//
// Copyright (c) Microsoft Corporation. All rights reserved.
//
// http://go.microsoft.com/fwlink/?LinkID=615560
//-------------------------------------------------------------------------------------
#pragma once
#include "DirectXMath.h"
namespace DirectX
{
enum ContainmentType
{
DISJOINT = 0,
INTERSECTS = 1,
CONTAINS = 2,
};
enum PlaneIntersectionType
{
FRONT = 0,
INTERSECTING = 1,
BACK = 2,
};
struct BoundingBox;
struct BoundingOrientedBox;
struct BoundingFrustum;
#pragma warning(push)
#pragma warning(disable:4324 4820)
// C4324: alignment padding warnings
// C4820: Off by default noise
//-------------------------------------------------------------------------------------
// Bounding sphere
//-------------------------------------------------------------------------------------
struct BoundingSphere
{
XMFLOAT3 Center; // Center of the sphere.
float Radius; // Radius of the sphere.
// Creators
BoundingSphere() : Center(0,0,0), Radius( 1.f ) {}
XM_CONSTEXPR BoundingSphere( _In_ const XMFLOAT3& center, _In_ float radius )
: Center(center), Radius(radius) {}
BoundingSphere( _In_ const BoundingSphere& sp )
: Center(sp.Center), Radius(sp.Radius) {}
// Methods
BoundingSphere& operator=( _In_ const BoundingSphere& sp ) { Center = sp.Center; Radius = sp.Radius; return *this; }
void XM_CALLCONV Transform( _Out_ BoundingSphere& Out, _In_ FXMMATRIX M ) const;
void XM_CALLCONV Transform( _Out_ BoundingSphere& Out, _In_ float Scale, _In_ FXMVECTOR Rotation, _In_ FXMVECTOR Translation ) const;
// Transform the sphere
ContainmentType XM_CALLCONV Contains( _In_ FXMVECTOR Point ) const;
ContainmentType XM_CALLCONV Contains( _In_ FXMVECTOR V0, _In_ FXMVECTOR V1, _In_ FXMVECTOR V2 ) const;
ContainmentType Contains( _In_ const BoundingSphere& sh ) const;
ContainmentType Contains( _In_ const BoundingBox& box ) const;
ContainmentType Contains( _In_ const BoundingOrientedBox& box ) const;
ContainmentType Contains( _In_ const BoundingFrustum& fr ) const;
bool Intersects( _In_ const BoundingSphere& sh ) const;
bool Intersects( _In_ const BoundingBox& box ) const;
bool Intersects( _In_ const BoundingOrientedBox& box ) const;
bool Intersects( _In_ const BoundingFrustum& fr ) const;
bool XM_CALLCONV Intersects( _In_ FXMVECTOR V0, _In_ FXMVECTOR V1, _In_ FXMVECTOR V2 ) const;
// Triangle-sphere test
PlaneIntersectionType XM_CALLCONV Intersects( _In_ FXMVECTOR Plane ) const;
// Plane-sphere test
bool XM_CALLCONV Intersects( _In_ FXMVECTOR Origin, _In_ FXMVECTOR Direction, _Out_ float& Dist ) const;
// Ray-sphere test
ContainmentType XM_CALLCONV ContainedBy( _In_ FXMVECTOR Plane0, _In_ FXMVECTOR Plane1, _In_ FXMVECTOR Plane2,
_In_ GXMVECTOR Plane3, _In_ HXMVECTOR Plane4, _In_ HXMVECTOR Plane5 ) const;
// Test sphere against six planes (see BoundingFrustum::GetPlanes)
// Static methods
static void CreateMerged( _Out_ BoundingSphere& Out, _In_ const BoundingSphere& S1, _In_ const BoundingSphere& S2 );
static void CreateFromBoundingBox( _Out_ BoundingSphere& Out, _In_ const BoundingBox& box );
static void CreateFromBoundingBox( _Out_ BoundingSphere& Out, _In_ const BoundingOrientedBox& box );
static void CreateFromPoints( _Out_ BoundingSphere& Out, _In_ size_t Count,
_In_reads_bytes_(sizeof(XMFLOAT3)+Stride*(Count-1)) const XMFLOAT3* pPoints, _In_ size_t Stride );
static void CreateFromFrustum( _Out_ BoundingSphere& Out, _In_ const BoundingFrustum& fr );
};
//-------------------------------------------------------------------------------------
// Axis-aligned bounding box
//-------------------------------------------------------------------------------------
struct BoundingBox
{
static const size_t CORNER_COUNT = 8;
XMFLOAT3 Center; // Center of the box.
XMFLOAT3 Extents; // Distance from the center to each side.
// Creators
BoundingBox() : Center(0,0,0), Extents( 1.f, 1.f, 1.f ) {}
XM_CONSTEXPR BoundingBox( _In_ const XMFLOAT3& center, _In_ const XMFLOAT3& extents )
: Center(center), Extents(extents) {}
BoundingBox( _In_ const BoundingBox& box ) : Center(box.Center), Extents(box.Extents) {}
// Methods
BoundingBox& operator=( _In_ const BoundingBox& box) { Center = box.Center; Extents = box.Extents; return *this; }
void XM_CALLCONV Transform( _Out_ BoundingBox& Out, _In_ FXMMATRIX M ) const;
void XM_CALLCONV Transform( _Out_ BoundingBox& Out, _In_ float Scale, _In_ FXMVECTOR Rotation, _In_ FXMVECTOR Translation ) const;
void GetCorners( _Out_writes_(8) XMFLOAT3* Corners ) const;
// Gets the 8 corners of the box
ContainmentType XM_CALLCONV Contains( _In_ FXMVECTOR Point ) const;
ContainmentType XM_CALLCONV Contains( _In_ FXMVECTOR V0, _In_ FXMVECTOR V1, _In_ FXMVECTOR V2 ) const;
ContainmentType Contains( _In_ const BoundingSphere& sh ) const;
ContainmentType Contains( _In_ const BoundingBox& box ) const;
ContainmentType Contains( _In_ const BoundingOrientedBox& box ) const;
ContainmentType Contains( _In_ const BoundingFrustum& fr ) const;
bool Intersects( _In_ const BoundingSphere& sh ) const;
bool Intersects( _In_ const BoundingBox& box ) const;
bool Intersects( _In_ const BoundingOrientedBox& box ) const;
bool Intersects( _In_ const BoundingFrustum& fr ) const;
bool XM_CALLCONV Intersects( _In_ FXMVECTOR V0, _In_ FXMVECTOR V1, _In_ FXMVECTOR V2 ) const;
// Triangle-Box test
PlaneIntersectionType XM_CALLCONV Intersects( _In_ FXMVECTOR Plane ) const;
// Plane-box test
bool XM_CALLCONV Intersects( _In_ FXMVECTOR Origin, _In_ FXMVECTOR Direction, _Out_ float& Dist ) const;
// Ray-Box test
ContainmentType XM_CALLCONV ContainedBy( _In_ FXMVECTOR Plane0, _In_ FXMVECTOR Plane1, _In_ FXMVECTOR Plane2,
_In_ GXMVECTOR Plane3, _In_ HXMVECTOR Plane4, _In_ HXMVECTOR Plane5 ) const;
// Test box against six planes (see BoundingFrustum::GetPlanes)
// Static methods
static void CreateMerged( _Out_ BoundingBox& Out, _In_ const BoundingBox& b1, _In_ const BoundingBox& b2 );
static void CreateFromSphere( _Out_ BoundingBox& Out, _In_ const BoundingSphere& sh );
static void XM_CALLCONV CreateFromPoints( _Out_ BoundingBox& Out, _In_ FXMVECTOR pt1, _In_ FXMVECTOR pt2 );
static void CreateFromPoints( _Out_ BoundingBox& Out, _In_ size_t Count,
_In_reads_bytes_(sizeof(XMFLOAT3)+Stride*(Count-1)) const XMFLOAT3* pPoints, _In_ size_t Stride );
};
//-------------------------------------------------------------------------------------
// Oriented bounding box
//-------------------------------------------------------------------------------------
struct BoundingOrientedBox
{
static const size_t CORNER_COUNT = 8;
XMFLOAT3 Center; // Center of the box.
XMFLOAT3 Extents; // Distance from the center to each side.
XMFLOAT4 Orientation; // Unit quaternion representing rotation (box -> world).
// Creators
BoundingOrientedBox() : Center(0,0,0), Extents( 1.f, 1.f, 1.f ), Orientation(0,0,0, 1.f ) {}
XM_CONSTEXPR BoundingOrientedBox( _In_ const XMFLOAT3& _Center, _In_ const XMFLOAT3& _Extents, _In_ const XMFLOAT4& _Orientation )
: Center(_Center), Extents(_Extents), Orientation(_Orientation) {}
BoundingOrientedBox( _In_ const BoundingOrientedBox& box )
: Center(box.Center), Extents(box.Extents), Orientation(box.Orientation) {}
// Methods
BoundingOrientedBox& operator=( _In_ const BoundingOrientedBox& box ) { Center = box.Center; Extents = box.Extents; Orientation = box.Orientation; return *this; }
void XM_CALLCONV Transform( _Out_ BoundingOrientedBox& Out, _In_ FXMMATRIX M ) const;
void XM_CALLCONV Transform( _Out_ BoundingOrientedBox& Out, _In_ float Scale, _In_ FXMVECTOR Rotation, _In_ FXMVECTOR Translation ) const;
void GetCorners( _Out_writes_(8) XMFLOAT3* Corners ) const;
// Gets the 8 corners of the box
ContainmentType XM_CALLCONV Contains( _In_ FXMVECTOR Point ) const;
ContainmentType XM_CALLCONV Contains( _In_ FXMVECTOR V0, _In_ FXMVECTOR V1, _In_ FXMVECTOR V2 ) const;
ContainmentType Contains( _In_ const BoundingSphere& sh ) const;
ContainmentType Contains( _In_ const BoundingBox& box ) const;
ContainmentType Contains( _In_ const BoundingOrientedBox& box ) const;
ContainmentType Contains( _In_ const BoundingFrustum& fr ) const;
bool Intersects( _In_ const BoundingSphere& sh ) const;
bool Intersects( _In_ const BoundingBox& box ) const;
bool Intersects( _In_ const BoundingOrientedBox& box ) const;
bool Intersects( _In_ const BoundingFrustum& fr ) const;
bool XM_CALLCONV Intersects( _In_ FXMVECTOR V0, _In_ FXMVECTOR V1, _In_ FXMVECTOR V2 ) const;
// Triangle-OrientedBox test
PlaneIntersectionType XM_CALLCONV Intersects( _In_ FXMVECTOR Plane ) const;
// Plane-OrientedBox test
bool XM_CALLCONV Intersects( _In_ FXMVECTOR Origin, _In_ FXMVECTOR Direction, _Out_ float& Dist ) const;
// Ray-OrientedBox test
ContainmentType XM_CALLCONV ContainedBy( _In_ FXMVECTOR Plane0, _In_ FXMVECTOR Plane1, _In_ FXMVECTOR Plane2,
_In_ GXMVECTOR Plane3, _In_ HXMVECTOR Plane4, _In_ HXMVECTOR Plane5 ) const;
// Test OrientedBox against six planes (see BoundingFrustum::GetPlanes)
// Static methods
static void CreateFromBoundingBox( _Out_ BoundingOrientedBox& Out, _In_ const BoundingBox& box );
static void CreateFromPoints( _Out_ BoundingOrientedBox& Out, _In_ size_t Count,
_In_reads_bytes_(sizeof(XMFLOAT3)+Stride*(Count-1)) const XMFLOAT3* pPoints, _In_ size_t Stride );
};
//-------------------------------------------------------------------------------------
// Bounding frustum
//-------------------------------------------------------------------------------------
struct BoundingFrustum
{
static const size_t CORNER_COUNT = 8;
XMFLOAT3 Origin; // Origin of the frustum (and projection).
XMFLOAT4 Orientation; // Quaternion representing rotation.
float RightSlope; // Positive X slope (X/Z).
float LeftSlope; // Negative X slope.
float TopSlope; // Positive Y slope (Y/Z).
float BottomSlope; // Negative Y slope.
float Near, Far; // Z of the near plane and far plane.
// Creators
BoundingFrustum() : Origin(0,0,0), Orientation(0,0,0, 1.f), RightSlope( 1.f ), LeftSlope( -1.f ),
TopSlope( 1.f ), BottomSlope( -1.f ), Near(0), Far( 1.f ) {}
XM_CONSTEXPR BoundingFrustum( _In_ const XMFLOAT3& _Origin, _In_ const XMFLOAT4& _Orientation,
_In_ float _RightSlope, _In_ float _LeftSlope, _In_ float _TopSlope, _In_ float _BottomSlope,
_In_ float _Near, _In_ float _Far )
: Origin(_Origin), Orientation(_Orientation),
RightSlope(_RightSlope), LeftSlope(_LeftSlope), TopSlope(_TopSlope), BottomSlope(_BottomSlope),
Near(_Near), Far(_Far) {}
BoundingFrustum( _In_ const BoundingFrustum& fr )
: Origin(fr.Origin), Orientation(fr.Orientation), RightSlope(fr.RightSlope), LeftSlope(fr.LeftSlope),
TopSlope(fr.TopSlope), BottomSlope(fr.BottomSlope), Near(fr.Near), Far(fr.Far) {}
BoundingFrustum( _In_ CXMMATRIX Projection ) { CreateFromMatrix( *this, Projection ); }
// Methods
BoundingFrustum& operator=( _In_ const BoundingFrustum& fr ) { Origin=fr.Origin; Orientation=fr.Orientation;
RightSlope=fr.RightSlope; LeftSlope=fr.LeftSlope;
TopSlope=fr.TopSlope; BottomSlope=fr.BottomSlope;
Near=fr.Near; Far=fr.Far; return *this; }
void XM_CALLCONV Transform( _Out_ BoundingFrustum& Out, _In_ FXMMATRIX M ) const;
void XM_CALLCONV Transform( _Out_ BoundingFrustum& Out, _In_ float Scale, _In_ FXMVECTOR Rotation, _In_ FXMVECTOR Translation ) const;
void GetCorners( _Out_writes_(8) XMFLOAT3* Corners ) const;
// Gets the 8 corners of the frustum
ContainmentType XM_CALLCONV Contains( _In_ FXMVECTOR Point ) const;
ContainmentType XM_CALLCONV Contains( _In_ FXMVECTOR V0, _In_ FXMVECTOR V1, _In_ FXMVECTOR V2 ) const;
ContainmentType Contains( _In_ const BoundingSphere& sp ) const;
ContainmentType Contains( _In_ const BoundingBox& box ) const;
ContainmentType Contains( _In_ const BoundingOrientedBox& box ) const;
ContainmentType Contains( _In_ const BoundingFrustum& fr ) const;
// Frustum-Frustum test
bool Intersects( _In_ const BoundingSphere& sh ) const;
bool Intersects( _In_ const BoundingBox& box ) const;
bool Intersects( _In_ const BoundingOrientedBox& box ) const;
bool Intersects( _In_ const BoundingFrustum& fr ) const;
bool XM_CALLCONV Intersects( _In_ FXMVECTOR V0, _In_ FXMVECTOR V1, _In_ FXMVECTOR V2 ) const;
// Triangle-Frustum test
PlaneIntersectionType XM_CALLCONV Intersects( _In_ FXMVECTOR Plane ) const;
// Plane-Frustum test
bool XM_CALLCONV Intersects( _In_ FXMVECTOR rayOrigin, _In_ FXMVECTOR Direction, _Out_ float& Dist ) const;
// Ray-Frustum test
ContainmentType XM_CALLCONV ContainedBy( _In_ FXMVECTOR Plane0, _In_ FXMVECTOR Plane1, _In_ FXMVECTOR Plane2,
_In_ GXMVECTOR Plane3, _In_ HXMVECTOR Plane4, _In_ HXMVECTOR Plane5 ) const;
// Test frustum against six planes (see BoundingFrustum::GetPlanes)
void GetPlanes( _Out_opt_ XMVECTOR* NearPlane, _Out_opt_ XMVECTOR* FarPlane, _Out_opt_ XMVECTOR* RightPlane,
_Out_opt_ XMVECTOR* LeftPlane, _Out_opt_ XMVECTOR* TopPlane, _Out_opt_ XMVECTOR* BottomPlane ) const;
// Create 6 Planes representation of Frustum
// Static methods
static void XM_CALLCONV CreateFromMatrix( _Out_ BoundingFrustum& Out, _In_ FXMMATRIX Projection );
};
//-----------------------------------------------------------------------------
// Triangle intersection testing routines.
//-----------------------------------------------------------------------------
namespace TriangleTests
{
bool XM_CALLCONV Intersects( _In_ FXMVECTOR Origin, _In_ FXMVECTOR Direction, _In_ FXMVECTOR V0, _In_ GXMVECTOR V1, _In_ HXMVECTOR V2, _Out_ float& Dist );
// Ray-Triangle
bool XM_CALLCONV Intersects( _In_ FXMVECTOR A0, _In_ FXMVECTOR A1, _In_ FXMVECTOR A2, _In_ GXMVECTOR B0, _In_ HXMVECTOR B1, _In_ HXMVECTOR B2 );
// Triangle-Triangle
PlaneIntersectionType XM_CALLCONV Intersects( _In_ FXMVECTOR V0, _In_ FXMVECTOR V1, _In_ FXMVECTOR V2, _In_ GXMVECTOR Plane );
// Plane-Triangle
ContainmentType XM_CALLCONV ContainedBy( _In_ FXMVECTOR V0, _In_ FXMVECTOR V1, _In_ FXMVECTOR V2,
_In_ GXMVECTOR Plane0, _In_ HXMVECTOR Plane1, _In_ HXMVECTOR Plane2,
_In_ CXMVECTOR Plane3, _In_ CXMVECTOR Plane4, _In_ CXMVECTOR Plane5 );
// Test a triangle against six planes at once (see BoundingFrustum::GetPlanes)
};
#pragma warning(pop)
/****************************************************************************
*
* Implementation
*
****************************************************************************/
#pragma warning(push)
#pragma warning(disable : 4068 4365 4616 6001)
// C4068/4616: ignore unknown pragmas
// C4365: Off by default noise
// C6001: False positives
#pragma prefast(push)
#pragma prefast(disable : 25000, "FXMVECTOR is 16 bytes")
#include "DirectXCollision.inl"
#pragma prefast(pop)
#pragma warning(pop)
}; // namespace DirectX
//-------------------------------------------------------------------------------------
// DirectXCollision.h -- C++ Collision Math library
//
// THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF
// ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO
// THE IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A
// PARTICULAR PURPOSE.
//
// Copyright (c) Microsoft Corporation. All rights reserved.
//
// http://go.microsoft.com/fwlink/?LinkID=615560
//-------------------------------------------------------------------------------------
#pragma once
#include "DirectXMath.h"
namespace DirectX
{
enum ContainmentType
{
DISJOINT = 0,
INTERSECTS = 1,
CONTAINS = 2,
};
enum PlaneIntersectionType
{
FRONT = 0,
INTERSECTING = 1,
BACK = 2,
};
struct BoundingBox;
struct BoundingOrientedBox;
struct BoundingFrustum;
#pragma warning(push)
#pragma warning(disable:4324 4820)
// C4324: alignment padding warnings
// C4820: Off by default noise
//-------------------------------------------------------------------------------------
// Bounding sphere
//-------------------------------------------------------------------------------------
struct BoundingSphere
{
XMFLOAT3 Center; // Center of the sphere.
float Radius; // Radius of the sphere.
// Creators
BoundingSphere() : Center(0,0,0), Radius( 1.f ) {}
XM_CONSTEXPR BoundingSphere( _In_ const XMFLOAT3& center, _In_ float radius )
: Center(center), Radius(radius) {}
BoundingSphere( _In_ const BoundingSphere& sp )
: Center(sp.Center), Radius(sp.Radius) {}
// Methods
BoundingSphere& operator=( _In_ const BoundingSphere& sp ) { Center = sp.Center; Radius = sp.Radius; return *this; }
void XM_CALLCONV Transform( _Out_ BoundingSphere& Out, _In_ FXMMATRIX M ) const;
void XM_CALLCONV Transform( _Out_ BoundingSphere& Out, _In_ float Scale, _In_ FXMVECTOR Rotation, _In_ FXMVECTOR Translation ) const;
// Transform the sphere
ContainmentType XM_CALLCONV Contains( _In_ FXMVECTOR Point ) const;
ContainmentType XM_CALLCONV Contains( _In_ FXMVECTOR V0, _In_ FXMVECTOR V1, _In_ FXMVECTOR V2 ) const;
ContainmentType Contains( _In_ const BoundingSphere& sh ) const;
ContainmentType Contains( _In_ const BoundingBox& box ) const;
ContainmentType Contains( _In_ const BoundingOrientedBox& box ) const;
ContainmentType Contains( _In_ const BoundingFrustum& fr ) const;
bool Intersects( _In_ const BoundingSphere& sh ) const;
bool Intersects( _In_ const BoundingBox& box ) const;
bool Intersects( _In_ const BoundingOrientedBox& box ) const;
bool Intersects( _In_ const BoundingFrustum& fr ) const;
bool XM_CALLCONV Intersects( _In_ FXMVECTOR V0, _In_ FXMVECTOR V1, _In_ FXMVECTOR V2 ) const;
// Triangle-sphere test
PlaneIntersectionType XM_CALLCONV Intersects( _In_ FXMVECTOR Plane ) const;
// Plane-sphere test
bool XM_CALLCONV Intersects( _In_ FXMVECTOR Origin, _In_ FXMVECTOR Direction, _Out_ float& Dist ) const;
// Ray-sphere test
ContainmentType XM_CALLCONV ContainedBy( _In_ FXMVECTOR Plane0, _In_ FXMVECTOR Plane1, _In_ FXMVECTOR Plane2,
_In_ GXMVECTOR Plane3, _In_ HXMVECTOR Plane4, _In_ HXMVECTOR Plane5 ) const;
// Test sphere against six planes (see BoundingFrustum::GetPlanes)
// Static methods
static void CreateMerged( _Out_ BoundingSphere& Out, _In_ const BoundingSphere& S1, _In_ const BoundingSphere& S2 );
static void CreateFromBoundingBox( _Out_ BoundingSphere& Out, _In_ const BoundingBox& box );
static void CreateFromBoundingBox( _Out_ BoundingSphere& Out, _In_ const BoundingOrientedBox& box );
static void CreateFromPoints( _Out_ BoundingSphere& Out, _In_ size_t Count,
_In_reads_bytes_(sizeof(XMFLOAT3)+Stride*(Count-1)) const XMFLOAT3* pPoints, _In_ size_t Stride );
static void CreateFromFrustum( _Out_ BoundingSphere& Out, _In_ const BoundingFrustum& fr );
};
//-------------------------------------------------------------------------------------
// Axis-aligned bounding box
//-------------------------------------------------------------------------------------
struct BoundingBox
{
static const size_t CORNER_COUNT = 8;
XMFLOAT3 Center; // Center of the box.
XMFLOAT3 Extents; // Distance from the center to each side.
// Creators
BoundingBox() : Center(0,0,0), Extents( 1.f, 1.f, 1.f ) {}
XM_CONSTEXPR BoundingBox( _In_ const XMFLOAT3& center, _In_ const XMFLOAT3& extents )
: Center(center), Extents(extents) {}
BoundingBox( _In_ const BoundingBox& box ) : Center(box.Center), Extents(box.Extents) {}
// Methods
BoundingBox& operator=( _In_ const BoundingBox& box) { Center = box.Center; Extents = box.Extents; return *this; }
void XM_CALLCONV Transform( _Out_ BoundingBox& Out, _In_ FXMMATRIX M ) const;
void XM_CALLCONV Transform( _Out_ BoundingBox& Out, _In_ float Scale, _In_ FXMVECTOR Rotation, _In_ FXMVECTOR Translation ) const;
void GetCorners( _Out_writes_(8) XMFLOAT3* Corners ) const;
// Gets the 8 corners of the box
ContainmentType XM_CALLCONV Contains( _In_ FXMVECTOR Point ) const;
ContainmentType XM_CALLCONV Contains( _In_ FXMVECTOR V0, _In_ FXMVECTOR V1, _In_ FXMVECTOR V2 ) const;
ContainmentType Contains( _In_ const BoundingSphere& sh ) const;
ContainmentType Contains( _In_ const BoundingBox& box ) const;
ContainmentType Contains( _In_ const BoundingOrientedBox& box ) const;
ContainmentType Contains( _In_ const BoundingFrustum& fr ) const;
bool Intersects( _In_ const BoundingSphere& sh ) const;
bool Intersects( _In_ const BoundingBox& box ) const;
bool Intersects( _In_ const BoundingOrientedBox& box ) const;
bool Intersects( _In_ const BoundingFrustum& fr ) const;
bool XM_CALLCONV Intersects( _In_ FXMVECTOR V0, _In_ FXMVECTOR V1, _In_ FXMVECTOR V2 ) const;
// Triangle-Box test
PlaneIntersectionType XM_CALLCONV Intersects( _In_ FXMVECTOR Plane ) const;
// Plane-box test
bool XM_CALLCONV Intersects( _In_ FXMVECTOR Origin, _In_ FXMVECTOR Direction, _Out_ float& Dist ) const;
// Ray-Box test
ContainmentType XM_CALLCONV ContainedBy( _In_ FXMVECTOR Plane0, _In_ FXMVECTOR Plane1, _In_ FXMVECTOR Plane2,
_In_ GXMVECTOR Plane3, _In_ HXMVECTOR Plane4, _In_ HXMVECTOR Plane5 ) const;
// Test box against six planes (see BoundingFrustum::GetPlanes)
// Static methods
static void CreateMerged( _Out_ BoundingBox& Out, _In_ const BoundingBox& b1, _In_ const BoundingBox& b2 );
static void CreateFromSphere( _Out_ BoundingBox& Out, _In_ const BoundingSphere& sh );
static void XM_CALLCONV CreateFromPoints( _Out_ BoundingBox& Out, _In_ FXMVECTOR pt1, _In_ FXMVECTOR pt2 );
static void CreateFromPoints( _Out_ BoundingBox& Out, _In_ size_t Count,
_In_reads_bytes_(sizeof(XMFLOAT3)+Stride*(Count-1)) const XMFLOAT3* pPoints, _In_ size_t Stride );
};
//-------------------------------------------------------------------------------------
// Oriented bounding box
//-------------------------------------------------------------------------------------
struct BoundingOrientedBox
{
static const size_t CORNER_COUNT = 8;
XMFLOAT3 Center; // Center of the box.
XMFLOAT3 Extents; // Distance from the center to each side.
XMFLOAT4 Orientation; // Unit quaternion representing rotation (box -> world).
// Creators
BoundingOrientedBox() : Center(0,0,0), Extents( 1.f, 1.f, 1.f ), Orientation(0,0,0, 1.f ) {}
XM_CONSTEXPR BoundingOrientedBox( _In_ const XMFLOAT3& _Center, _In_ const XMFLOAT3& _Extents, _In_ const XMFLOAT4& _Orientation )
: Center(_Center), Extents(_Extents), Orientation(_Orientation) {}
BoundingOrientedBox( _In_ const BoundingOrientedBox& box )
: Center(box.Center), Extents(box.Extents), Orientation(box.Orientation) {}
// Methods
BoundingOrientedBox& operator=( _In_ const BoundingOrientedBox& box ) { Center = box.Center; Extents = box.Extents; Orientation = box.Orientation; return *this; }
void XM_CALLCONV Transform( _Out_ BoundingOrientedBox& Out, _In_ FXMMATRIX M ) const;
void XM_CALLCONV Transform( _Out_ BoundingOrientedBox& Out, _In_ float Scale, _In_ FXMVECTOR Rotation, _In_ FXMVECTOR Translation ) const;
void GetCorners( _Out_writes_(8) XMFLOAT3* Corners ) const;
// Gets the 8 corners of the box
ContainmentType XM_CALLCONV Contains( _In_ FXMVECTOR Point ) const;
ContainmentType XM_CALLCONV Contains( _In_ FXMVECTOR V0, _In_ FXMVECTOR V1, _In_ FXMVECTOR V2 ) const;
ContainmentType Contains( _In_ const BoundingSphere& sh ) const;
ContainmentType Contains( _In_ const BoundingBox& box ) const;
ContainmentType Contains( _In_ const BoundingOrientedBox& box ) const;
ContainmentType Contains( _In_ const BoundingFrustum& fr ) const;
bool Intersects( _In_ const BoundingSphere& sh ) const;
bool Intersects( _In_ const BoundingBox& box ) const;
bool Intersects( _In_ const BoundingOrientedBox& box ) const;
bool Intersects( _In_ const BoundingFrustum& fr ) const;
bool XM_CALLCONV Intersects( _In_ FXMVECTOR V0, _In_ FXMVECTOR V1, _In_ FXMVECTOR V2 ) const;
// Triangle-OrientedBox test
PlaneIntersectionType XM_CALLCONV Intersects( _In_ FXMVECTOR Plane ) const;
// Plane-OrientedBox test
bool XM_CALLCONV Intersects( _In_ FXMVECTOR Origin, _In_ FXMVECTOR Direction, _Out_ float& Dist ) const;
// Ray-OrientedBox test
ContainmentType XM_CALLCONV ContainedBy( _In_ FXMVECTOR Plane0, _In_ FXMVECTOR Plane1, _In_ FXMVECTOR Plane2,
_In_ GXMVECTOR Plane3, _In_ HXMVECTOR Plane4, _In_ HXMVECTOR Plane5 ) const;
// Test OrientedBox against six planes (see BoundingFrustum::GetPlanes)
// Static methods
static void CreateFromBoundingBox( _Out_ BoundingOrientedBox& Out, _In_ const BoundingBox& box );
static void CreateFromPoints( _Out_ BoundingOrientedBox& Out, _In_ size_t Count,
_In_reads_bytes_(sizeof(XMFLOAT3)+Stride*(Count-1)) const XMFLOAT3* pPoints, _In_ size_t Stride );
};
//-------------------------------------------------------------------------------------
// Bounding frustum
//-------------------------------------------------------------------------------------
struct BoundingFrustum
{
static const size_t CORNER_COUNT = 8;
XMFLOAT3 Origin; // Origin of the frustum (and projection).
XMFLOAT4 Orientation; // Quaternion representing rotation.
float RightSlope; // Positive X slope (X/Z).
float LeftSlope; // Negative X slope.
float TopSlope; // Positive Y slope (Y/Z).
float BottomSlope; // Negative Y slope.
float Near, Far; // Z of the near plane and far plane.
// Creators
BoundingFrustum() : Origin(0,0,0), Orientation(0,0,0, 1.f), RightSlope( 1.f ), LeftSlope( -1.f ),
TopSlope( 1.f ), BottomSlope( -1.f ), Near(0), Far( 1.f ) {}
XM_CONSTEXPR BoundingFrustum( _In_ const XMFLOAT3& _Origin, _In_ const XMFLOAT4& _Orientation,
_In_ float _RightSlope, _In_ float _LeftSlope, _In_ float _TopSlope, _In_ float _BottomSlope,
_In_ float _Near, _In_ float _Far )
: Origin(_Origin), Orientation(_Orientation),
RightSlope(_RightSlope), LeftSlope(_LeftSlope), TopSlope(_TopSlope), BottomSlope(_BottomSlope),
Near(_Near), Far(_Far) {}
BoundingFrustum( _In_ const BoundingFrustum& fr )
: Origin(fr.Origin), Orientation(fr.Orientation), RightSlope(fr.RightSlope), LeftSlope(fr.LeftSlope),
TopSlope(fr.TopSlope), BottomSlope(fr.BottomSlope), Near(fr.Near), Far(fr.Far) {}
BoundingFrustum( _In_ CXMMATRIX Projection ) { CreateFromMatrix( *this, Projection ); }
// Methods
BoundingFrustum& operator=( _In_ const BoundingFrustum& fr ) { Origin=fr.Origin; Orientation=fr.Orientation;
RightSlope=fr.RightSlope; LeftSlope=fr.LeftSlope;
TopSlope=fr.TopSlope; BottomSlope=fr.BottomSlope;
Near=fr.Near; Far=fr.Far; return *this; }
void XM_CALLCONV Transform( _Out_ BoundingFrustum& Out, _In_ FXMMATRIX M ) const;
void XM_CALLCONV Transform( _Out_ BoundingFrustum& Out, _In_ float Scale, _In_ FXMVECTOR Rotation, _In_ FXMVECTOR Translation ) const;
void GetCorners( _Out_writes_(8) XMFLOAT3* Corners ) const;
// Gets the 8 corners of the frustum
ContainmentType XM_CALLCONV Contains( _In_ FXMVECTOR Point ) const;
ContainmentType XM_CALLCONV Contains( _In_ FXMVECTOR V0, _In_ FXMVECTOR V1, _In_ FXMVECTOR V2 ) const;
ContainmentType Contains( _In_ const BoundingSphere& sp ) const;
ContainmentType Contains( _In_ const BoundingBox& box ) const;
ContainmentType Contains( _In_ const BoundingOrientedBox& box ) const;
ContainmentType Contains( _In_ const BoundingFrustum& fr ) const;
// Frustum-Frustum test
bool Intersects( _In_ const BoundingSphere& sh ) const;
bool Intersects( _In_ const BoundingBox& box ) const;
bool Intersects( _In_ const BoundingOrientedBox& box ) const;
bool Intersects( _In_ const BoundingFrustum& fr ) const;
bool XM_CALLCONV Intersects( _In_ FXMVECTOR V0, _In_ FXMVECTOR V1, _In_ FXMVECTOR V2 ) const;
// Triangle-Frustum test
PlaneIntersectionType XM_CALLCONV Intersects( _In_ FXMVECTOR Plane ) const;
// Plane-Frustum test
bool XM_CALLCONV Intersects( _In_ FXMVECTOR rayOrigin, _In_ FXMVECTOR Direction, _Out_ float& Dist ) const;
// Ray-Frustum test
ContainmentType XM_CALLCONV ContainedBy( _In_ FXMVECTOR Plane0, _In_ FXMVECTOR Plane1, _In_ FXMVECTOR Plane2,
_In_ GXMVECTOR Plane3, _In_ HXMVECTOR Plane4, _In_ HXMVECTOR Plane5 ) const;
// Test frustum against six planes (see BoundingFrustum::GetPlanes)
void GetPlanes( _Out_opt_ XMVECTOR* NearPlane, _Out_opt_ XMVECTOR* FarPlane, _Out_opt_ XMVECTOR* RightPlane,
_Out_opt_ XMVECTOR* LeftPlane, _Out_opt_ XMVECTOR* TopPlane, _Out_opt_ XMVECTOR* BottomPlane ) const;
// Create 6 Planes representation of Frustum
// Static methods
static void XM_CALLCONV CreateFromMatrix( _Out_ BoundingFrustum& Out, _In_ FXMMATRIX Projection );
};
//-----------------------------------------------------------------------------
// Triangle intersection testing routines.
//-----------------------------------------------------------------------------
namespace TriangleTests
{
bool XM_CALLCONV Intersects( _In_ FXMVECTOR Origin, _In_ FXMVECTOR Direction, _In_ FXMVECTOR V0, _In_ GXMVECTOR V1, _In_ HXMVECTOR V2, _Out_ float& Dist );
// Ray-Triangle
bool XM_CALLCONV Intersects( _In_ FXMVECTOR A0, _In_ FXMVECTOR A1, _In_ FXMVECTOR A2, _In_ GXMVECTOR B0, _In_ HXMVECTOR B1, _In_ HXMVECTOR B2 );
// Triangle-Triangle
PlaneIntersectionType XM_CALLCONV Intersects( _In_ FXMVECTOR V0, _In_ FXMVECTOR V1, _In_ FXMVECTOR V2, _In_ GXMVECTOR Plane );
// Plane-Triangle
ContainmentType XM_CALLCONV ContainedBy( _In_ FXMVECTOR V0, _In_ FXMVECTOR V1, _In_ FXMVECTOR V2,
_In_ GXMVECTOR Plane0, _In_ HXMVECTOR Plane1, _In_ HXMVECTOR Plane2,
_In_ CXMVECTOR Plane3, _In_ CXMVECTOR Plane4, _In_ CXMVECTOR Plane5 );
// Test a triangle against six planes at once (see BoundingFrustum::GetPlanes)
};
#pragma warning(pop)
/****************************************************************************
*
* Implementation
*
****************************************************************************/
#pragma warning(push)
#pragma warning(disable : 4068 4365 4616 6001)
// C4068/4616: ignore unknown pragmas
// C4365: Off by default noise
// C6001: False positives
#pragma prefast(push)
#pragma prefast(disable : 25000, "FXMVECTOR is 16 bytes")
#include "DirectXCollision.inl"
#pragma prefast(pop)
#pragma warning(pop)
}; // namespace DirectX

File diff suppressed because it is too large Load Diff

View File

@ -1,169 +1,169 @@
//-------------------------------------------------------------------------------------
// DirectXColors.h -- C++ Color Math library
//
// THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF
// ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO
// THE IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A
// PARTICULAR PURPOSE.
//
// Copyright (c) Microsoft Corporation. All rights reserved.
//
// http://go.microsoft.com/fwlink/?LinkID=615560
//-------------------------------------------------------------------------------------
#pragma once
#include "DirectXMath.h"
namespace DirectX
{
namespace Colors
{
// Standard colors (Red/Green/Blue/Alpha)
XMGLOBALCONST XMVECTORF32 AliceBlue = {0.941176534f, 0.972549081f, 1.000000000f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 AntiqueWhite = {0.980392218f, 0.921568692f, 0.843137324f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 Aqua = {0.000000000f, 1.000000000f, 1.000000000f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 Aquamarine = {0.498039246f, 1.000000000f, 0.831372619f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 Azure = {0.941176534f, 1.000000000f, 1.000000000f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 Beige = {0.960784376f, 0.960784376f, 0.862745166f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 Bisque = {1.000000000f, 0.894117713f, 0.768627524f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 Black = {0.000000000f, 0.000000000f, 0.000000000f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 BlanchedAlmond = {1.000000000f, 0.921568692f, 0.803921640f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 Blue = {0.000000000f, 0.000000000f, 1.000000000f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 BlueViolet = {0.541176498f, 0.168627456f, 0.886274576f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 Brown = {0.647058845f, 0.164705887f, 0.164705887f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 BurlyWood = {0.870588303f, 0.721568644f, 0.529411793f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 CadetBlue = {0.372549027f, 0.619607866f, 0.627451003f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 Chartreuse = {0.498039246f, 1.000000000f, 0.000000000f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 Chocolate = {0.823529482f, 0.411764741f, 0.117647067f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 Coral = {1.000000000f, 0.498039246f, 0.313725501f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 CornflowerBlue = {0.392156899f, 0.584313750f, 0.929411829f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 Cornsilk = {1.000000000f, 0.972549081f, 0.862745166f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 Crimson = {0.862745166f, 0.078431375f, 0.235294133f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 Cyan = {0.000000000f, 1.000000000f, 1.000000000f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 DarkBlue = {0.000000000f, 0.000000000f, 0.545098066f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 DarkCyan = {0.000000000f, 0.545098066f, 0.545098066f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 DarkGoldenrod = {0.721568644f, 0.525490224f, 0.043137256f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 DarkGray = {0.662745118f, 0.662745118f, 0.662745118f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 DarkGreen = {0.000000000f, 0.392156899f, 0.000000000f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 DarkKhaki = {0.741176486f, 0.717647076f, 0.419607878f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 DarkMagenta = {0.545098066f, 0.000000000f, 0.545098066f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 DarkOliveGreen = {0.333333343f, 0.419607878f, 0.184313729f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 DarkOrange = {1.000000000f, 0.549019635f, 0.000000000f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 DarkOrchid = {0.600000024f, 0.196078449f, 0.800000072f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 DarkRed = {0.545098066f, 0.000000000f, 0.000000000f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 DarkSalmon = {0.913725555f, 0.588235319f, 0.478431404f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 DarkSeaGreen = {0.560784340f, 0.737254918f, 0.545098066f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 DarkSlateBlue = {0.282352954f, 0.239215702f, 0.545098066f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 DarkSlateGray = {0.184313729f, 0.309803933f, 0.309803933f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 DarkTurquoise = {0.000000000f, 0.807843208f, 0.819607913f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 DarkViolet = {0.580392182f, 0.000000000f, 0.827451050f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 DeepPink = {1.000000000f, 0.078431375f, 0.576470613f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 DeepSkyBlue = {0.000000000f, 0.749019623f, 1.000000000f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 DimGray = {0.411764741f, 0.411764741f, 0.411764741f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 DodgerBlue = {0.117647067f, 0.564705908f, 1.000000000f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 Firebrick = {0.698039234f, 0.133333340f, 0.133333340f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 FloralWhite = {1.000000000f, 0.980392218f, 0.941176534f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 ForestGreen = {0.133333340f, 0.545098066f, 0.133333340f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 Fuchsia = {1.000000000f, 0.000000000f, 1.000000000f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 Gainsboro = {0.862745166f, 0.862745166f, 0.862745166f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 GhostWhite = {0.972549081f, 0.972549081f, 1.000000000f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 Gold = {1.000000000f, 0.843137324f, 0.000000000f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 Goldenrod = {0.854902029f, 0.647058845f, 0.125490203f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 Gray = {0.501960814f, 0.501960814f, 0.501960814f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 Green = {0.000000000f, 0.501960814f, 0.000000000f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 GreenYellow = {0.678431392f, 1.000000000f, 0.184313729f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 Honeydew = {0.941176534f, 1.000000000f, 0.941176534f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 HotPink = {1.000000000f, 0.411764741f, 0.705882370f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 IndianRed = {0.803921640f, 0.360784322f, 0.360784322f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 Indigo = {0.294117659f, 0.000000000f, 0.509803951f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 Ivory = {1.000000000f, 1.000000000f, 0.941176534f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 Khaki = {0.941176534f, 0.901960850f, 0.549019635f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 Lavender = {0.901960850f, 0.901960850f, 0.980392218f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 LavenderBlush = {1.000000000f, 0.941176534f, 0.960784376f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 LawnGreen = {0.486274540f, 0.988235354f, 0.000000000f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 LemonChiffon = {1.000000000f, 0.980392218f, 0.803921640f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 LightBlue = {0.678431392f, 0.847058892f, 0.901960850f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 LightCoral = {0.941176534f, 0.501960814f, 0.501960814f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 LightCyan = {0.878431439f, 1.000000000f, 1.000000000f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 LightGoldenrodYellow = {0.980392218f, 0.980392218f, 0.823529482f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 LightGreen = {0.564705908f, 0.933333397f, 0.564705908f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 LightGray = {0.827451050f, 0.827451050f, 0.827451050f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 LightPink = {1.000000000f, 0.713725507f, 0.756862819f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 LightSalmon = {1.000000000f, 0.627451003f, 0.478431404f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 LightSeaGreen = {0.125490203f, 0.698039234f, 0.666666687f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 LightSkyBlue = {0.529411793f, 0.807843208f, 0.980392218f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 LightSlateGray = {0.466666698f, 0.533333361f, 0.600000024f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 LightSteelBlue = {0.690196097f, 0.768627524f, 0.870588303f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 LightYellow = {1.000000000f, 1.000000000f, 0.878431439f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 Lime = {0.000000000f, 1.000000000f, 0.000000000f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 LimeGreen = {0.196078449f, 0.803921640f, 0.196078449f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 Linen = {0.980392218f, 0.941176534f, 0.901960850f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 Magenta = {1.000000000f, 0.000000000f, 1.000000000f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 Maroon = {0.501960814f, 0.000000000f, 0.000000000f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 MediumAquamarine = {0.400000036f, 0.803921640f, 0.666666687f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 MediumBlue = {0.000000000f, 0.000000000f, 0.803921640f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 MediumOrchid = {0.729411781f, 0.333333343f, 0.827451050f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 MediumPurple = {0.576470613f, 0.439215720f, 0.858823597f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 MediumSeaGreen = {0.235294133f, 0.701960802f, 0.443137288f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 MediumSlateBlue = {0.482352972f, 0.407843173f, 0.933333397f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 MediumSpringGreen = {0.000000000f, 0.980392218f, 0.603921592f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 MediumTurquoise = {0.282352954f, 0.819607913f, 0.800000072f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 MediumVioletRed = {0.780392230f, 0.082352944f, 0.521568656f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 MidnightBlue = {0.098039225f, 0.098039225f, 0.439215720f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 MintCream = {0.960784376f, 1.000000000f, 0.980392218f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 MistyRose = {1.000000000f, 0.894117713f, 0.882353008f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 Moccasin = {1.000000000f, 0.894117713f, 0.709803939f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 NavajoWhite = {1.000000000f, 0.870588303f, 0.678431392f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 Navy = {0.000000000f, 0.000000000f, 0.501960814f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 OldLace = {0.992156923f, 0.960784376f, 0.901960850f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 Olive = {0.501960814f, 0.501960814f, 0.000000000f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 OliveDrab = {0.419607878f, 0.556862772f, 0.137254909f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 Orange = {1.000000000f, 0.647058845f, 0.000000000f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 OrangeRed = {1.000000000f, 0.270588249f, 0.000000000f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 Orchid = {0.854902029f, 0.439215720f, 0.839215755f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 PaleGoldenrod = {0.933333397f, 0.909803987f, 0.666666687f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 PaleGreen = {0.596078455f, 0.984313786f, 0.596078455f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 PaleTurquoise = {0.686274529f, 0.933333397f, 0.933333397f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 PaleVioletRed = {0.858823597f, 0.439215720f, 0.576470613f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 PapayaWhip = {1.000000000f, 0.937254965f, 0.835294187f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 PeachPuff = {1.000000000f, 0.854902029f, 0.725490212f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 Peru = {0.803921640f, 0.521568656f, 0.247058839f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 Pink = {1.000000000f, 0.752941251f, 0.796078503f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 Plum = {0.866666734f, 0.627451003f, 0.866666734f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 PowderBlue = {0.690196097f, 0.878431439f, 0.901960850f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 Purple = {0.501960814f, 0.000000000f, 0.501960814f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 Red = {1.000000000f, 0.000000000f, 0.000000000f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 RosyBrown = {0.737254918f, 0.560784340f, 0.560784340f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 RoyalBlue = {0.254901975f, 0.411764741f, 0.882353008f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 SaddleBrown = {0.545098066f, 0.270588249f, 0.074509807f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 Salmon = {0.980392218f, 0.501960814f, 0.447058856f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 SandyBrown = {0.956862807f, 0.643137276f, 0.376470625f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 SeaGreen = {0.180392161f, 0.545098066f, 0.341176480f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 SeaShell = {1.000000000f, 0.960784376f, 0.933333397f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 Sienna = {0.627451003f, 0.321568638f, 0.176470593f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 Silver = {0.752941251f, 0.752941251f, 0.752941251f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 SkyBlue = {0.529411793f, 0.807843208f, 0.921568692f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 SlateBlue = {0.415686309f, 0.352941185f, 0.803921640f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 SlateGray = {0.439215720f, 0.501960814f, 0.564705908f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 Snow = {1.000000000f, 0.980392218f, 0.980392218f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 SpringGreen = {0.000000000f, 1.000000000f, 0.498039246f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 SteelBlue = {0.274509817f, 0.509803951f, 0.705882370f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 Tan = {0.823529482f, 0.705882370f, 0.549019635f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 Teal = {0.000000000f, 0.501960814f, 0.501960814f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 Thistle = {0.847058892f, 0.749019623f, 0.847058892f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 Tomato = {1.000000000f, 0.388235331f, 0.278431386f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 Transparent = {0.000000000f, 0.000000000f, 0.000000000f, 0.000000000f};
XMGLOBALCONST XMVECTORF32 Turquoise = {0.250980407f, 0.878431439f, 0.815686345f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 Violet = {0.933333397f, 0.509803951f, 0.933333397f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 Wheat = {0.960784376f, 0.870588303f, 0.701960802f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 White = {1.000000000f, 1.000000000f, 1.000000000f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 WhiteSmoke = {0.960784376f, 0.960784376f, 0.960784376f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 Yellow = {1.000000000f, 1.000000000f, 0.000000000f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 YellowGreen = {0.603921592f, 0.803921640f, 0.196078449f, 1.000000000f};
}; // namespace Colors
}; // namespace DirectX
//-------------------------------------------------------------------------------------
// DirectXColors.h -- C++ Color Math library
//
// THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF
// ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO
// THE IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A
// PARTICULAR PURPOSE.
//
// Copyright (c) Microsoft Corporation. All rights reserved.
//
// http://go.microsoft.com/fwlink/?LinkID=615560
//-------------------------------------------------------------------------------------
#pragma once
#include "DirectXMath.h"
namespace DirectX
{
namespace Colors
{
// Standard colors (Red/Green/Blue/Alpha)
XMGLOBALCONST XMVECTORF32 AliceBlue = {0.941176534f, 0.972549081f, 1.000000000f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 AntiqueWhite = {0.980392218f, 0.921568692f, 0.843137324f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 Aqua = {0.000000000f, 1.000000000f, 1.000000000f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 Aquamarine = {0.498039246f, 1.000000000f, 0.831372619f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 Azure = {0.941176534f, 1.000000000f, 1.000000000f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 Beige = {0.960784376f, 0.960784376f, 0.862745166f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 Bisque = {1.000000000f, 0.894117713f, 0.768627524f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 Black = {0.000000000f, 0.000000000f, 0.000000000f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 BlanchedAlmond = {1.000000000f, 0.921568692f, 0.803921640f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 Blue = {0.000000000f, 0.000000000f, 1.000000000f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 BlueViolet = {0.541176498f, 0.168627456f, 0.886274576f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 Brown = {0.647058845f, 0.164705887f, 0.164705887f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 BurlyWood = {0.870588303f, 0.721568644f, 0.529411793f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 CadetBlue = {0.372549027f, 0.619607866f, 0.627451003f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 Chartreuse = {0.498039246f, 1.000000000f, 0.000000000f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 Chocolate = {0.823529482f, 0.411764741f, 0.117647067f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 Coral = {1.000000000f, 0.498039246f, 0.313725501f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 CornflowerBlue = {0.392156899f, 0.584313750f, 0.929411829f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 Cornsilk = {1.000000000f, 0.972549081f, 0.862745166f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 Crimson = {0.862745166f, 0.078431375f, 0.235294133f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 Cyan = {0.000000000f, 1.000000000f, 1.000000000f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 DarkBlue = {0.000000000f, 0.000000000f, 0.545098066f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 DarkCyan = {0.000000000f, 0.545098066f, 0.545098066f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 DarkGoldenrod = {0.721568644f, 0.525490224f, 0.043137256f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 DarkGray = {0.662745118f, 0.662745118f, 0.662745118f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 DarkGreen = {0.000000000f, 0.392156899f, 0.000000000f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 DarkKhaki = {0.741176486f, 0.717647076f, 0.419607878f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 DarkMagenta = {0.545098066f, 0.000000000f, 0.545098066f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 DarkOliveGreen = {0.333333343f, 0.419607878f, 0.184313729f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 DarkOrange = {1.000000000f, 0.549019635f, 0.000000000f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 DarkOrchid = {0.600000024f, 0.196078449f, 0.800000072f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 DarkRed = {0.545098066f, 0.000000000f, 0.000000000f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 DarkSalmon = {0.913725555f, 0.588235319f, 0.478431404f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 DarkSeaGreen = {0.560784340f, 0.737254918f, 0.545098066f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 DarkSlateBlue = {0.282352954f, 0.239215702f, 0.545098066f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 DarkSlateGray = {0.184313729f, 0.309803933f, 0.309803933f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 DarkTurquoise = {0.000000000f, 0.807843208f, 0.819607913f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 DarkViolet = {0.580392182f, 0.000000000f, 0.827451050f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 DeepPink = {1.000000000f, 0.078431375f, 0.576470613f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 DeepSkyBlue = {0.000000000f, 0.749019623f, 1.000000000f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 DimGray = {0.411764741f, 0.411764741f, 0.411764741f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 DodgerBlue = {0.117647067f, 0.564705908f, 1.000000000f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 Firebrick = {0.698039234f, 0.133333340f, 0.133333340f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 FloralWhite = {1.000000000f, 0.980392218f, 0.941176534f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 ForestGreen = {0.133333340f, 0.545098066f, 0.133333340f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 Fuchsia = {1.000000000f, 0.000000000f, 1.000000000f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 Gainsboro = {0.862745166f, 0.862745166f, 0.862745166f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 GhostWhite = {0.972549081f, 0.972549081f, 1.000000000f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 Gold = {1.000000000f, 0.843137324f, 0.000000000f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 Goldenrod = {0.854902029f, 0.647058845f, 0.125490203f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 Gray = {0.501960814f, 0.501960814f, 0.501960814f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 Green = {0.000000000f, 0.501960814f, 0.000000000f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 GreenYellow = {0.678431392f, 1.000000000f, 0.184313729f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 Honeydew = {0.941176534f, 1.000000000f, 0.941176534f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 HotPink = {1.000000000f, 0.411764741f, 0.705882370f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 IndianRed = {0.803921640f, 0.360784322f, 0.360784322f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 Indigo = {0.294117659f, 0.000000000f, 0.509803951f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 Ivory = {1.000000000f, 1.000000000f, 0.941176534f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 Khaki = {0.941176534f, 0.901960850f, 0.549019635f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 Lavender = {0.901960850f, 0.901960850f, 0.980392218f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 LavenderBlush = {1.000000000f, 0.941176534f, 0.960784376f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 LawnGreen = {0.486274540f, 0.988235354f, 0.000000000f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 LemonChiffon = {1.000000000f, 0.980392218f, 0.803921640f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 LightBlue = {0.678431392f, 0.847058892f, 0.901960850f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 LightCoral = {0.941176534f, 0.501960814f, 0.501960814f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 LightCyan = {0.878431439f, 1.000000000f, 1.000000000f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 LightGoldenrodYellow = {0.980392218f, 0.980392218f, 0.823529482f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 LightGreen = {0.564705908f, 0.933333397f, 0.564705908f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 LightGray = {0.827451050f, 0.827451050f, 0.827451050f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 LightPink = {1.000000000f, 0.713725507f, 0.756862819f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 LightSalmon = {1.000000000f, 0.627451003f, 0.478431404f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 LightSeaGreen = {0.125490203f, 0.698039234f, 0.666666687f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 LightSkyBlue = {0.529411793f, 0.807843208f, 0.980392218f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 LightSlateGray = {0.466666698f, 0.533333361f, 0.600000024f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 LightSteelBlue = {0.690196097f, 0.768627524f, 0.870588303f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 LightYellow = {1.000000000f, 1.000000000f, 0.878431439f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 Lime = {0.000000000f, 1.000000000f, 0.000000000f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 LimeGreen = {0.196078449f, 0.803921640f, 0.196078449f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 Linen = {0.980392218f, 0.941176534f, 0.901960850f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 Magenta = {1.000000000f, 0.000000000f, 1.000000000f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 Maroon = {0.501960814f, 0.000000000f, 0.000000000f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 MediumAquamarine = {0.400000036f, 0.803921640f, 0.666666687f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 MediumBlue = {0.000000000f, 0.000000000f, 0.803921640f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 MediumOrchid = {0.729411781f, 0.333333343f, 0.827451050f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 MediumPurple = {0.576470613f, 0.439215720f, 0.858823597f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 MediumSeaGreen = {0.235294133f, 0.701960802f, 0.443137288f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 MediumSlateBlue = {0.482352972f, 0.407843173f, 0.933333397f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 MediumSpringGreen = {0.000000000f, 0.980392218f, 0.603921592f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 MediumTurquoise = {0.282352954f, 0.819607913f, 0.800000072f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 MediumVioletRed = {0.780392230f, 0.082352944f, 0.521568656f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 MidnightBlue = {0.098039225f, 0.098039225f, 0.439215720f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 MintCream = {0.960784376f, 1.000000000f, 0.980392218f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 MistyRose = {1.000000000f, 0.894117713f, 0.882353008f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 Moccasin = {1.000000000f, 0.894117713f, 0.709803939f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 NavajoWhite = {1.000000000f, 0.870588303f, 0.678431392f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 Navy = {0.000000000f, 0.000000000f, 0.501960814f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 OldLace = {0.992156923f, 0.960784376f, 0.901960850f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 Olive = {0.501960814f, 0.501960814f, 0.000000000f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 OliveDrab = {0.419607878f, 0.556862772f, 0.137254909f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 Orange = {1.000000000f, 0.647058845f, 0.000000000f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 OrangeRed = {1.000000000f, 0.270588249f, 0.000000000f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 Orchid = {0.854902029f, 0.439215720f, 0.839215755f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 PaleGoldenrod = {0.933333397f, 0.909803987f, 0.666666687f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 PaleGreen = {0.596078455f, 0.984313786f, 0.596078455f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 PaleTurquoise = {0.686274529f, 0.933333397f, 0.933333397f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 PaleVioletRed = {0.858823597f, 0.439215720f, 0.576470613f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 PapayaWhip = {1.000000000f, 0.937254965f, 0.835294187f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 PeachPuff = {1.000000000f, 0.854902029f, 0.725490212f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 Peru = {0.803921640f, 0.521568656f, 0.247058839f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 Pink = {1.000000000f, 0.752941251f, 0.796078503f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 Plum = {0.866666734f, 0.627451003f, 0.866666734f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 PowderBlue = {0.690196097f, 0.878431439f, 0.901960850f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 Purple = {0.501960814f, 0.000000000f, 0.501960814f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 Red = {1.000000000f, 0.000000000f, 0.000000000f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 RosyBrown = {0.737254918f, 0.560784340f, 0.560784340f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 RoyalBlue = {0.254901975f, 0.411764741f, 0.882353008f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 SaddleBrown = {0.545098066f, 0.270588249f, 0.074509807f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 Salmon = {0.980392218f, 0.501960814f, 0.447058856f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 SandyBrown = {0.956862807f, 0.643137276f, 0.376470625f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 SeaGreen = {0.180392161f, 0.545098066f, 0.341176480f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 SeaShell = {1.000000000f, 0.960784376f, 0.933333397f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 Sienna = {0.627451003f, 0.321568638f, 0.176470593f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 Silver = {0.752941251f, 0.752941251f, 0.752941251f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 SkyBlue = {0.529411793f, 0.807843208f, 0.921568692f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 SlateBlue = {0.415686309f, 0.352941185f, 0.803921640f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 SlateGray = {0.439215720f, 0.501960814f, 0.564705908f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 Snow = {1.000000000f, 0.980392218f, 0.980392218f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 SpringGreen = {0.000000000f, 1.000000000f, 0.498039246f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 SteelBlue = {0.274509817f, 0.509803951f, 0.705882370f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 Tan = {0.823529482f, 0.705882370f, 0.549019635f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 Teal = {0.000000000f, 0.501960814f, 0.501960814f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 Thistle = {0.847058892f, 0.749019623f, 0.847058892f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 Tomato = {1.000000000f, 0.388235331f, 0.278431386f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 Transparent = {0.000000000f, 0.000000000f, 0.000000000f, 0.000000000f};
XMGLOBALCONST XMVECTORF32 Turquoise = {0.250980407f, 0.878431439f, 0.815686345f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 Violet = {0.933333397f, 0.509803951f, 0.933333397f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 Wheat = {0.960784376f, 0.870588303f, 0.701960802f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 White = {1.000000000f, 1.000000000f, 1.000000000f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 WhiteSmoke = {0.960784376f, 0.960784376f, 0.960784376f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 Yellow = {1.000000000f, 1.000000000f, 0.000000000f, 1.000000000f};
XMGLOBALCONST XMVECTORF32 YellowGreen = {0.603921592f, 0.803921640f, 0.196078449f, 1.000000000f};
}; // namespace Colors
}; // namespace DirectX

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

42
MIT.txt
View File

@ -1,21 +1,21 @@
The MIT License (MIT)
Copyright (c) 2016 Microsoft Corp
Permission is hereby granted, free of charge, to any person obtaining a copy of this
software and associated documentation files (the "Software"), to deal in the Software
without restriction, including without limitation the rights to use, copy, modify,
merge, publish, distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to the following
conditions:
The above copyright notice and this permission notice shall be included in all copies
or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE
OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
The MIT License (MIT)
Copyright (c) 2016 Microsoft Corp
Permission is hereby granted, free of charge, to any person obtaining a copy of this
software and associated documentation files (the "Software"), to deal in the Software
without restriction, including without limitation the rights to use, copy, modify,
merge, publish, distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to the following
conditions:
The above copyright notice and this permission notice shall be included in all copies
or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE
OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

View File

@ -1,131 +1,131 @@
-----------
DirectXMath
-----------
Copyright (c) Microsoft Corporation. All rights reserved.
June 2016
This package contains the DirectXMath library, an all inline SIMD C++ linear algebra library
-----------
DirectXMath
-----------
Copyright (c) Microsoft Corporation. All rights reserved.
June 2016
This package contains the DirectXMath library, an all inline SIMD C++ linear algebra library
for use in games and graphics apps
This code is designed to build with Visual Studio 2013 or 2015. It is recommended that you
make use of VS 2013 Update 5 or VS 2015 Update 2.
These components are designed to work without requiring any content from the DirectX SDK. For details,
see "Where is the DirectX SDK?" <http://msdn.microsoft.com/en-us/library/ee663275.aspx>.
Inc\
DirectXMath Files (in the DirectX C++ namespace)
DirectXMath.h - Core library
DirectXPackedVector.h - Load/Store functions and types for working with various compressed GPU formats
DirectXColors.h - .NET-style Color defines in sRGB color space
DirectXCollision.h - Bounding volume collision library
Extentions\
Advanced instruction set variants for guarded codepaths
DirectXMathSSE3.h - SSE3
DirectXMathBE.h - Supplemental SSE3 (SSSE3)
DirectXMathSSE4.h - SSE4.1
DirectXMathAVX.h - Advanced Vector Extensions (AVX)
DirectXMathAVX2.h - Advanced Vector Extensions 2 (AVX2)
DirectXMathF16C.h - Half-precision conversions (F16C)
DirectXMathFMA3.h - Fused multiply-accumulate (FMA3)
DirectXMathFMA4.h - Fused multiply-accumulate (FMA4)
SHMath\
Spherical Harmonics math functions
DirectXSH.h - Header for SHMath functions
DirectXSH.cpp, DirectXSHD3D11.cpp - Implementation
XDSP\
XDSP.h - Digital Signal Processing helper functions
All content and source code for this package are subject to the terms of the MIT License.
<http://opensource.org/licenses/MIT>.
Documentation is available at <https://msdn.microsoft.com/en-us/library/windows/desktop/hh437833.aspx>.
For the latest version of DirectXMath, bug reports, etc. please visit the project site.
<https://github.com/Microsoft/DirectXMath>
This project has adopted the Microsoft Open Source Code of Conduct. For more information see the
Code of Conduct FAQ or contact opencode@microsoft.com with any additional questions or comments.
https://opensource.microsoft.com/codeofconduct/
---------------
RELEASE HISTORY
---------------
June 2016 (3.09)
Includes support for additional optimizations when built with /arch:AVX or /arch:AVX2
Added use of constexpr for type constructors, XMConvertToRadians, and XMConvertToDegrees
Marked __vector4i, XMXDEC4, XMDECN4, XMDEC4, and associated Load & Store functions as deprecated.
These are vestiges of Xbox 360 support and will be removed in a future release
Renamed parameter in XMMatrixPerspectiveFov* to reduce user confusion when relying on IntelliSense
XMU565, XMUNIBBLE4 constructors take uint8_t instead of int8_t
May 2016
DirectXMath 3.08 released under the MIT license
November 2015 (3.08)
Added use of _mm_sfence for Stream methods
Fixed bug with non-uniform scaling transforms for BoundingOrientedBox
Added asserts for Near/FarZ in XMMatrix* methods
Added use of =default for PODs with VS 2013/2015
Additional SSE and ARM-NEON optimizations for PackedVector functions
April 2015 (3.07)
Fix customer reported bugs in BoundingBox methods
Fix customer reported bug in XMStoreFloat3SE
Fix customer reported bug in XMVectorATan2, XMVectorATan2Est
Fix customer reported bug in XMVectorRound
October 2013 (3.06)
Fixed load/store of XMFLOAT3SE to properly match the DXGI_FORMAT_R9G9B9E5_SHAREDEXP
Added XMLoadUDecN4_XR and XMStoreUDecN4_XR to match DXGI_FORMAT_R10G10B10_XR_BIAS_A2_UNORM
Added XMColorRGBToSRGB and XMColorSRGBToRGB to convert linear RGB <-> sRGB
July 2013 (3.05)
Use x86/x64 __vectorcall calling-convention when available (XM_CALLCONV, HXMVECTOR, FXMMATRIX introduced)
Fixed bug with XMVectorFloor and XMVectorCeiling when given whole odd numbers (i.e. 105.0)
Improved XMVectorRound algorithm
ARM-NEON optimizations for XMVectorExp2, XMVectorLog2, XMVectorExpE, and XMVectorLogE
ARM-NEON code paths use multiply-by-scalar intrinsics when supported
Additional optimizations for ARM-NEON Stream functions
Fixed potential warning C4723 using operator/ or operator/=
March 2013 (3.04)
XMVectorExp2, XMVectorLog2, XMVectorExpE, and XMVectorLogE functions added to provide base-e support in addition to the existing base-2 support
XMVectorExp and XMVectorLog are now aliases for XMVectorExp2 and XMVectorLog2
Additional optimizations for Stream functions
XMVector3Cross now ensures w component is zero on ARM
XMConvertHalfToFloat and XMConvertFloatToHalf now use IEEE 754 standard float16 behavior for INF/QNAN
Updated matrix version Transform for BoundingOrientedBox and BoundingFrustum to handle scaling
March 2012 (3.03)
Breaking change: Removed union members from XMMATRIX type to make it a fully 'opaque' type
Marked single-parameter C++ constructors for XMFLOAT2, XMFLOAT2A, XMFLOAT3, XMFLOAT3A, XMFLOAT4, and XMFLOAT4A explicit
February 2012 (3.02)
ARM-NEON intrinsics (selected by default for the ARM platform)
reworked XMVectorPermute, change of XM_PERMUTE_ defines, removal of XMVectorPermuteControl
Addition of XM_SWIZZLE_ defines
Optimizations for transcendental functions
Template forms for permute, swizzle, shift-left, rotate-left, rotation-right, and insert
Removal of deprecated types and functions
(XM_CACHE_LINE_SIZE define, XMVectorExpEst, XMVectorLogEst, XMVectorPowEst, XMVectorSinHEs, XMVectorCosHEst, XMVectorTanHEst,
XMVector2InBoundsR, XMVector3InBoundsR, XMVector4InBoundsR)
Removed XM_STRICT_VECTOR4; XMVECTOR in NO-INTRINSICS always defined without .x, .y, .z, .w, .v, or .u
Additional bounding types
SAL fixes and improvements
September 2011 (3.00)
Renamed and reorganized the headers
Introduced C++ namespaces
Removed the Xbox 360-specific GPU types
(HENDN3, XMHEND3, XMUHENDN3, XMUHEND3, XMDHENN3, XMDHEN3,
XMUDHENN3, XMUDHEN3, XMXICON4, XMXICO4, XMICON4, XMICO4, XMUICON4, XMUICO4 )
This code is designed to build with Visual Studio 2013 or 2015. It is recommended that you
make use of VS 2013 Update 5 or VS 2015 Update 2.
These components are designed to work without requiring any content from the DirectX SDK. For details,
see "Where is the DirectX SDK?" <http://msdn.microsoft.com/en-us/library/ee663275.aspx>.
Inc\
DirectXMath Files (in the DirectX C++ namespace)
DirectXMath.h - Core library
DirectXPackedVector.h - Load/Store functions and types for working with various compressed GPU formats
DirectXColors.h - .NET-style Color defines in sRGB color space
DirectXCollision.h - Bounding volume collision library
Extentions\
Advanced instruction set variants for guarded codepaths
DirectXMathSSE3.h - SSE3
DirectXMathBE.h - Supplemental SSE3 (SSSE3)
DirectXMathSSE4.h - SSE4.1
DirectXMathAVX.h - Advanced Vector Extensions (AVX)
DirectXMathAVX2.h - Advanced Vector Extensions 2 (AVX2)
DirectXMathF16C.h - Half-precision conversions (F16C)
DirectXMathFMA3.h - Fused multiply-accumulate (FMA3)
DirectXMathFMA4.h - Fused multiply-accumulate (FMA4)
SHMath\
Spherical Harmonics math functions
DirectXSH.h - Header for SHMath functions
DirectXSH.cpp, DirectXSHD3D11.cpp - Implementation
XDSP\
XDSP.h - Digital Signal Processing helper functions
All content and source code for this package are subject to the terms of the MIT License.
<http://opensource.org/licenses/MIT>.
Documentation is available at <https://msdn.microsoft.com/en-us/library/windows/desktop/hh437833.aspx>.
For the latest version of DirectXMath, bug reports, etc. please visit the project site.
<https://github.com/Microsoft/DirectXMath>
This project has adopted the Microsoft Open Source Code of Conduct. For more information see the
Code of Conduct FAQ or contact opencode@microsoft.com with any additional questions or comments.
https://opensource.microsoft.com/codeofconduct/
---------------
RELEASE HISTORY
---------------
June 2016 (3.09)
Includes support for additional optimizations when built with /arch:AVX or /arch:AVX2
Added use of constexpr for type constructors, XMConvertToRadians, and XMConvertToDegrees
Marked __vector4i, XMXDEC4, XMDECN4, XMDEC4, and associated Load & Store functions as deprecated.
These are vestiges of Xbox 360 support and will be removed in a future release
Renamed parameter in XMMatrixPerspectiveFov* to reduce user confusion when relying on IntelliSense
XMU565, XMUNIBBLE4 constructors take uint8_t instead of int8_t
May 2016
DirectXMath 3.08 released under the MIT license
November 2015 (3.08)
Added use of _mm_sfence for Stream methods
Fixed bug with non-uniform scaling transforms for BoundingOrientedBox
Added asserts for Near/FarZ in XMMatrix* methods
Added use of =default for PODs with VS 2013/2015
Additional SSE and ARM-NEON optimizations for PackedVector functions
April 2015 (3.07)
Fix customer reported bugs in BoundingBox methods
Fix customer reported bug in XMStoreFloat3SE
Fix customer reported bug in XMVectorATan2, XMVectorATan2Est
Fix customer reported bug in XMVectorRound
October 2013 (3.06)
Fixed load/store of XMFLOAT3SE to properly match the DXGI_FORMAT_R9G9B9E5_SHAREDEXP
Added XMLoadUDecN4_XR and XMStoreUDecN4_XR to match DXGI_FORMAT_R10G10B10_XR_BIAS_A2_UNORM
Added XMColorRGBToSRGB and XMColorSRGBToRGB to convert linear RGB <-> sRGB
July 2013 (3.05)
Use x86/x64 __vectorcall calling-convention when available (XM_CALLCONV, HXMVECTOR, FXMMATRIX introduced)
Fixed bug with XMVectorFloor and XMVectorCeiling when given whole odd numbers (i.e. 105.0)
Improved XMVectorRound algorithm
ARM-NEON optimizations for XMVectorExp2, XMVectorLog2, XMVectorExpE, and XMVectorLogE
ARM-NEON code paths use multiply-by-scalar intrinsics when supported
Additional optimizations for ARM-NEON Stream functions
Fixed potential warning C4723 using operator/ or operator/=
March 2013 (3.04)
XMVectorExp2, XMVectorLog2, XMVectorExpE, and XMVectorLogE functions added to provide base-e support in addition to the existing base-2 support
XMVectorExp and XMVectorLog are now aliases for XMVectorExp2 and XMVectorLog2
Additional optimizations for Stream functions
XMVector3Cross now ensures w component is zero on ARM
XMConvertHalfToFloat and XMConvertFloatToHalf now use IEEE 754 standard float16 behavior for INF/QNAN
Updated matrix version Transform for BoundingOrientedBox and BoundingFrustum to handle scaling
March 2012 (3.03)
Breaking change: Removed union members from XMMATRIX type to make it a fully 'opaque' type
Marked single-parameter C++ constructors for XMFLOAT2, XMFLOAT2A, XMFLOAT3, XMFLOAT3A, XMFLOAT4, and XMFLOAT4A explicit
February 2012 (3.02)
ARM-NEON intrinsics (selected by default for the ARM platform)
reworked XMVectorPermute, change of XM_PERMUTE_ defines, removal of XMVectorPermuteControl
Addition of XM_SWIZZLE_ defines
Optimizations for transcendental functions
Template forms for permute, swizzle, shift-left, rotate-left, rotation-right, and insert
Removal of deprecated types and functions
(XM_CACHE_LINE_SIZE define, XMVectorExpEst, XMVectorLogEst, XMVectorPowEst, XMVectorSinHEs, XMVectorCosHEst, XMVectorTanHEst,
XMVector2InBoundsR, XMVector3InBoundsR, XMVector4InBoundsR)
Removed XM_STRICT_VECTOR4; XMVECTOR in NO-INTRINSICS always defined without .x, .y, .z, .w, .v, or .u
Additional bounding types
SAL fixes and improvements
September 2011 (3.00)
Renamed and reorganized the headers
Introduced C++ namespaces
Removed the Xbox 360-specific GPU types
(HENDN3, XMHEND3, XMUHENDN3, XMUHEND3, XMDHENN3, XMDHEN3,
XMUDHENN3, XMUDHEN3, XMXICON4, XMXICO4, XMICON4, XMICO4, XMUICON4, XMUICO4 )

File diff suppressed because it is too large Load Diff

View File

@ -1,77 +1,77 @@
//-------------------------------------------------------------------------------------
// DirectXSH.h -- C++ Spherical Harmonics Math Library
//
// THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF
// ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO
// THE IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A
// PARTICULAR PURPOSE.
//
// Copyright (c) Microsoft Corporation. All rights reserved.
//
// http://go.microsoft.com/fwlink/p/?LinkId=262885
//-------------------------------------------------------------------------------------
#ifdef _MSC_VER
#pragma once
#endif
#define DIRECTX_SHMATH_VERSION 102
#include <DirectXMath.h>
#include <winerror.h>
struct ID3D11DeviceContext;
struct ID3D11Texture2D;
namespace DirectX
{
#if (DIRECTXMATH_VERSION < 305) && !defined(XM_CALLCONV)
#define XM_CALLCONV __fastcall
typedef const DirectX::XMVECTOR& HXMVECTOR;
typedef const DirectX::XMMATRIX& FXMMATRIX;
#endif
const size_t XM_SH_MINORDER = 2;
const size_t XM_SH_MAXORDER = 6;
float* XM_CALLCONV XMSHEvalDirection( _Out_writes_(order*order) float *result, _In_ size_t order, _In_ FXMVECTOR dir );
float* XM_CALLCONV XMSHRotate( _Out_writes_(order*order) float *result, _In_ size_t order, _In_ FXMMATRIX rotMatrix, _In_reads_(order*order) const float *input );
float* XMSHRotateZ( _Out_writes_(order*order) float *result, _In_ size_t order, _In_ float angle, _In_reads_(order*order) const float *input );
float* XMSHAdd( _Out_writes_(order*order) float *result, _In_ size_t order, _In_reads_(order*order) const float *inputA, _In_reads_(order*order) const float *inputB );
float* XMSHScale( _Out_writes_(order*order) float *result, _In_ size_t order, _In_reads_(order*order) const float *input, _In_ float scale );
float XMSHDot( _In_ size_t order, _In_reads_(order*order) const float *inputA, _In_reads_(order*order) const float *inputB );
float* XMSHMultiply( _Out_writes_(order*order) float *result, _In_ size_t order, _In_reads_(order*order) const float *inputF, _In_reads_(order*order) const float *inputG );
float* XMSHMultiply2( _Out_writes_(4) float *result, _In_reads_(4) const float *inputF, _In_reads_(4) const float *inputG );
float* XMSHMultiply3( _Out_writes_(9) float *result, _In_reads_(9) const float *inputF, _In_reads_(9) const float *inputG );
float* XMSHMultiply4( _Out_writes_(16) float *result, _In_reads_(16) const float *inputF, _In_reads_(16) const float *inputG );
float* XMSHMultiply5( _Out_writes_(25) float *result, _In_reads_(25) const float *inputF, _In_reads_(25) const float *inputG );
float* XMSHMultiply6( _Out_writes_(36) float *result, _In_reads_(36) const float *inputF, _In_reads_(36) const float *inputG );
bool XM_CALLCONV XMSHEvalDirectionalLight( _In_ size_t order, _In_ FXMVECTOR dir, _In_ FXMVECTOR color,
_Out_writes_(order*order) float *resultR, _Out_writes_opt_(order*order) float *resultG, _Out_writes_opt_(order*order) float *resultB );
bool XM_CALLCONV XMSHEvalSphericalLight( _In_ size_t order, _In_ FXMVECTOR pos, _In_ float radius, _In_ FXMVECTOR color,
_Out_writes_(order*order) float *resultR, _Out_writes_opt_(order*order) float *resultG, _Out_writes_opt_(order*order) float *resultB );
bool XM_CALLCONV XMSHEvalConeLight( _In_ size_t order, _In_ FXMVECTOR dir, _In_ float radius, _In_ FXMVECTOR color,
_Out_writes_(order*order) float *resultR, _Out_writes_opt_(order*order) float *resultG, _Out_writes_opt_(order*order) float *resultB );
bool XM_CALLCONV XMSHEvalHemisphereLight( _In_ size_t order, _In_ FXMVECTOR dir, _In_ FXMVECTOR topColor, _In_ FXMVECTOR bottomColor,
_Out_writes_(order*order) float *resultR, _Out_writes_opt_(order*order) float *resultG, _Out_writes_opt_(order*order) float *resultB );
HRESULT SHProjectCubeMap( _In_ ID3D11DeviceContext *context, _In_ size_t order, _In_ ID3D11Texture2D *cubeMap,
_Out_writes_opt_(order*order) float *resultR, _Out_writes_opt_(order*order) float *resultG, _Out_writes_opt_(order*order) float *resultB );
}; // namespace DirectX
//-------------------------------------------------------------------------------------
// DirectXSH.h -- C++ Spherical Harmonics Math Library
//
// THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF
// ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO
// THE IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A
// PARTICULAR PURPOSE.
//
// Copyright (c) Microsoft Corporation. All rights reserved.
//
// http://go.microsoft.com/fwlink/p/?LinkId=262885
//-------------------------------------------------------------------------------------
#ifdef _MSC_VER
#pragma once
#endif
#define DIRECTX_SHMATH_VERSION 102
#include <DirectXMath.h>
#include <winerror.h>
struct ID3D11DeviceContext;
struct ID3D11Texture2D;
namespace DirectX
{
#if (DIRECTXMATH_VERSION < 305) && !defined(XM_CALLCONV)
#define XM_CALLCONV __fastcall
typedef const DirectX::XMVECTOR& HXMVECTOR;
typedef const DirectX::XMMATRIX& FXMMATRIX;
#endif
const size_t XM_SH_MINORDER = 2;
const size_t XM_SH_MAXORDER = 6;
float* XM_CALLCONV XMSHEvalDirection( _Out_writes_(order*order) float *result, _In_ size_t order, _In_ FXMVECTOR dir );
float* XM_CALLCONV XMSHRotate( _Out_writes_(order*order) float *result, _In_ size_t order, _In_ FXMMATRIX rotMatrix, _In_reads_(order*order) const float *input );
float* XMSHRotateZ( _Out_writes_(order*order) float *result, _In_ size_t order, _In_ float angle, _In_reads_(order*order) const float *input );
float* XMSHAdd( _Out_writes_(order*order) float *result, _In_ size_t order, _In_reads_(order*order) const float *inputA, _In_reads_(order*order) const float *inputB );
float* XMSHScale( _Out_writes_(order*order) float *result, _In_ size_t order, _In_reads_(order*order) const float *input, _In_ float scale );
float XMSHDot( _In_ size_t order, _In_reads_(order*order) const float *inputA, _In_reads_(order*order) const float *inputB );
float* XMSHMultiply( _Out_writes_(order*order) float *result, _In_ size_t order, _In_reads_(order*order) const float *inputF, _In_reads_(order*order) const float *inputG );
float* XMSHMultiply2( _Out_writes_(4) float *result, _In_reads_(4) const float *inputF, _In_reads_(4) const float *inputG );
float* XMSHMultiply3( _Out_writes_(9) float *result, _In_reads_(9) const float *inputF, _In_reads_(9) const float *inputG );
float* XMSHMultiply4( _Out_writes_(16) float *result, _In_reads_(16) const float *inputF, _In_reads_(16) const float *inputG );
float* XMSHMultiply5( _Out_writes_(25) float *result, _In_reads_(25) const float *inputF, _In_reads_(25) const float *inputG );
float* XMSHMultiply6( _Out_writes_(36) float *result, _In_reads_(36) const float *inputF, _In_reads_(36) const float *inputG );
bool XM_CALLCONV XMSHEvalDirectionalLight( _In_ size_t order, _In_ FXMVECTOR dir, _In_ FXMVECTOR color,
_Out_writes_(order*order) float *resultR, _Out_writes_opt_(order*order) float *resultG, _Out_writes_opt_(order*order) float *resultB );
bool XM_CALLCONV XMSHEvalSphericalLight( _In_ size_t order, _In_ FXMVECTOR pos, _In_ float radius, _In_ FXMVECTOR color,
_Out_writes_(order*order) float *resultR, _Out_writes_opt_(order*order) float *resultG, _Out_writes_opt_(order*order) float *resultB );
bool XM_CALLCONV XMSHEvalConeLight( _In_ size_t order, _In_ FXMVECTOR dir, _In_ float radius, _In_ FXMVECTOR color,
_Out_writes_(order*order) float *resultR, _Out_writes_opt_(order*order) float *resultG, _Out_writes_opt_(order*order) float *resultB );
bool XM_CALLCONV XMSHEvalHemisphereLight( _In_ size_t order, _In_ FXMVECTOR dir, _In_ FXMVECTOR topColor, _In_ FXMVECTOR bottomColor,
_Out_writes_(order*order) float *resultR, _Out_writes_opt_(order*order) float *resultG, _Out_writes_opt_(order*order) float *resultB );
HRESULT SHProjectCubeMap( _In_ ID3D11DeviceContext *context, _In_ size_t order, _In_ ID3D11Texture2D *cubeMap,
_Out_writes_opt_(order*order) float *resultR, _Out_writes_opt_(order*order) float *resultG, _Out_writes_opt_(order*order) float *resultB );
}; // namespace DirectX

View File

@ -1,390 +1,390 @@
//-------------------------------------------------------------------------------------
// DirectXSHD3D11.cpp -- C++ Spherical Harmonics Math Library
//
// THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF
// ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO
// THE IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A
// PARTICULAR PURPOSE.
//
// Copyright (c) Microsoft Corporation. All rights reserved.
//
// http://go.microsoft.com/fwlink/p/?LinkId=262885
//-------------------------------------------------------------------------------------
#include "DirectXSH.h"
#include <d3d11.h>
#include <DirectXPackedVector.h>
#include <assert.h>
#include <memory>
#include <malloc.h>
namespace
{
struct aligned_deleter { void operator()(void* p) { _aligned_free(p); } };
typedef std::unique_ptr<DirectX::XMVECTOR, aligned_deleter> ScopedAlignedArrayXMVECTOR;
template<class T> class ScopedObject
{
public:
explicit ScopedObject( T *p = 0 ) : _pointer(p) {}
~ScopedObject()
{
if ( _pointer )
{
_pointer->Release();
_pointer = nullptr;
}
}
bool IsNull() const { return (!_pointer); }
T& operator*() { return *_pointer; }
T* operator->() { return _pointer; }
T** operator&() { return &_pointer; }
void Reset(T *p = 0) { if ( _pointer ) { _pointer->Release(); } _pointer = p; }
T* Get() const { return _pointer; }
private:
ScopedObject(const ScopedObject&);
ScopedObject& operator=(const ScopedObject&);
T* _pointer;
};
//-------------------------------------------------------------------------------------
// This code is lifted from DirectXTex http://directxtex.codeplex.com/
// If you need additional DXGI format support, see DirectXTexConvert.cpp
//-------------------------------------------------------------------------------------
#define LOAD_SCANLINE( type, func )\
if ( size >= sizeof(type) )\
{\
const type * __restrict sPtr = reinterpret_cast<const type*>(pSource);\
for( size_t icount = 0; icount < ( size - sizeof(type) + 1 ); icount += sizeof(type) )\
{\
if ( dPtr >= ePtr ) break;\
*(dPtr++) = func( sPtr++ );\
}\
return true;\
}\
return false;
#define LOAD_SCANLINE3( type, func, defvec )\
if ( size >= sizeof(type) )\
{\
const type * __restrict sPtr = reinterpret_cast<const type*>(pSource);\
for( size_t icount = 0; icount < ( size - sizeof(type) + 1 ); icount += sizeof(type) )\
{\
XMVECTOR v = func( sPtr++ );\
if ( dPtr >= ePtr ) break;\
*(dPtr++) = XMVectorSelect( defvec, v, g_XMSelect1110 );\
}\
return true;\
}\
return false;
#define LOAD_SCANLINE2( type, func, defvec )\
if ( size >= sizeof(type) )\
{\
const type * __restrict sPtr = reinterpret_cast<const type*>(pSource);\
for( size_t icount = 0; icount < ( size - sizeof(type) + 1 ); icount += sizeof(type) )\
{\
XMVECTOR v = func( sPtr++ );\
if ( dPtr >= ePtr ) break;\
*(dPtr++) = XMVectorSelect( defvec, v, g_XMSelect1100 );\
}\
return true;\
}\
return false;
#pragma warning(push)
#pragma warning(disable : 6101)
_Success_(return)
static bool _LoadScanline( _Out_writes_(count) DirectX::XMVECTOR* pDestination, _In_ size_t count,
_In_reads_bytes_(size) LPCVOID pSource, _In_ size_t size, _In_ DXGI_FORMAT format )
{
assert( pDestination && count > 0 && (((uintptr_t)pDestination & 0xF) == 0) );
assert( pSource && size > 0 );
using namespace DirectX;
using namespace DirectX::PackedVector;
XMVECTOR* __restrict dPtr = pDestination;
if ( !dPtr )
return false;
const XMVECTOR* ePtr = pDestination + count;
switch( format )
{
case DXGI_FORMAT_R32G32B32A32_FLOAT:
{
size_t msize = (size > (sizeof(XMVECTOR)*count)) ? (sizeof(XMVECTOR)*count) : size;
memcpy_s( dPtr, sizeof(XMVECTOR)*count, pSource, msize );
}
return true;
case DXGI_FORMAT_R32G32B32_FLOAT:
LOAD_SCANLINE3( XMFLOAT3, XMLoadFloat3, g_XMIdentityR3 )
case DXGI_FORMAT_R16G16B16A16_FLOAT:
LOAD_SCANLINE( XMHALF4, XMLoadHalf4 )
case DXGI_FORMAT_R32G32_FLOAT:
LOAD_SCANLINE2( XMFLOAT2, XMLoadFloat2, g_XMIdentityR3 )
case DXGI_FORMAT_R11G11B10_FLOAT:
LOAD_SCANLINE3( XMFLOAT3PK, XMLoadFloat3PK, g_XMIdentityR3 );
case DXGI_FORMAT_R16G16_FLOAT:
LOAD_SCANLINE2( XMHALF2, XMLoadHalf2, g_XMIdentityR3 )
case DXGI_FORMAT_R32_FLOAT:
if ( size >= sizeof(float) )
{
const float* __restrict sPtr = reinterpret_cast<const float*>(pSource);
for( size_t icount = 0; icount < size; icount += sizeof(float) )
{
XMVECTOR v = XMLoadFloat( sPtr++ );
if ( dPtr >= ePtr ) break;
*(dPtr++) = XMVectorSelect( g_XMIdentityR3, v, g_XMSelect1000 );
}
return true;
}
return false;
case DXGI_FORMAT_R16_FLOAT:
if ( size >= sizeof(HALF) )
{
const HALF * __restrict sPtr = reinterpret_cast<const HALF*>(pSource);
for( size_t icount = 0; icount < size; icount += sizeof(HALF) )
{
if ( dPtr >= ePtr ) break;
*(dPtr++) = XMVectorSet( XMConvertHalfToFloat(*sPtr++), 0.f, 0.f, 1.f );
}
return true;
}
return false;
default:
return false;
}
}
#pragma warning(pop)
}; // namespace anonymous
namespace DirectX
{
//-------------------------------------------------------------------------------------
// Projects a function represented in a cube map into spherical harmonics.
//
// http://msdn.microsoft.com/en-us/library/windows/desktop/ff476300.aspx
//-------------------------------------------------------------------------------------
HRESULT SHProjectCubeMap( _In_ ID3D11DeviceContext *context,
_In_ size_t order,
_In_ ID3D11Texture2D *cubeMap,
_Out_writes_opt_(order*order) float *resultR,
_Out_writes_opt_(order*order) float *resultG,
_Out_writes_opt_(order*order) float* resultB )
{
if ( !context || !cubeMap )
return E_INVALIDARG;
if ( order < XM_SH_MINORDER || order > XM_SH_MAXORDER )
return E_INVALIDARG;
D3D11_TEXTURE2D_DESC desc;
cubeMap->GetDesc( &desc );
if ( (desc.ArraySize != 6)
|| (desc.Width != desc.Height)
|| (desc.SampleDesc.Count > 1) )
return E_FAIL;
switch( desc.Format )
{
case DXGI_FORMAT_R32G32B32A32_FLOAT:
case DXGI_FORMAT_R32G32B32_FLOAT:
case DXGI_FORMAT_R16G16B16A16_FLOAT:
case DXGI_FORMAT_R32G32_FLOAT:
case DXGI_FORMAT_R11G11B10_FLOAT:
case DXGI_FORMAT_R16G16_FLOAT:
case DXGI_FORMAT_R32_FLOAT:
case DXGI_FORMAT_R16_FLOAT:
// See _LoadScanline to support more pixel formats
break;
default:
return E_FAIL;
}
//--- Create a staging resource copy (if needed) to be able to read data
ID3D11Texture2D* texture = nullptr;
ScopedObject<ID3D11Texture2D> staging;
if ( !(desc.CPUAccessFlags & D3D11_CPU_ACCESS_READ) )
{
D3D11_TEXTURE2D_DESC sdesc = desc;
sdesc.BindFlags = 0;
sdesc.CPUAccessFlags = D3D11_CPU_ACCESS_READ;
sdesc.Usage = D3D11_USAGE_STAGING;
ScopedObject<ID3D11Device> device;
context->GetDevice( &device );
assert( !device.IsNull() );
HRESULT hr = device->CreateTexture2D( &sdesc, nullptr, &staging );
if ( FAILED(hr) )
return hr;
context->CopyResource( staging.Get(), cubeMap );
texture = staging.Get();
}
else
texture = cubeMap;
assert( texture != 0 );
//--- Setup for SH projection
ScopedAlignedArrayXMVECTOR scanline( reinterpret_cast<XMVECTOR*>( _aligned_malloc( sizeof(XMVECTOR)*desc.Width, 16 ) ) );
if ( !scanline )
return E_OUTOFMEMORY;
assert( desc.Width > 0 );
float fSize = static_cast<float>( desc.Width );
float fPicSize = 1.0f / fSize;
// index from [0,W-1], f(0) maps to -1 + 1/W, f(W-1) maps to 1 - 1/w
// linear function x*S +B, 1st constraint means B is (-1+1/W), plug into
// second and solve for S: S = 2*(1-1/W)/(W-1). The old code that did
// this was incorrect - but only for computing the differential solid
// angle, where the final value was 1.0 instead of 1-1/w...
float fB = -1.0f + 1.0f/fSize;
float fS = ( desc.Width > 1 ) ? (2.0f*(1.0f-1.0f/fSize)/(fSize-1.0f)) : 0.f;
// clear out accumulation variables
float fWt = 0.0f;
if ( resultR )
memset( resultR, 0, sizeof(float)*order*order );
if ( resultG )
memset( resultG, 0, sizeof(float)*order*order );
if ( resultB )
memset( resultB, 0, sizeof(float)*order*order );
float shBuff[XM_SH_MAXORDER*XM_SH_MAXORDER];
float shBuffB[XM_SH_MAXORDER*XM_SH_MAXORDER];
//--- Process each face of the cubemap
for (UINT face=0; face < 6; ++face )
{
UINT dindex = D3D11CalcSubresource( 0, face, desc.MipLevels );
D3D11_MAPPED_SUBRESOURCE mapped;
HRESULT hr = context->Map( texture, dindex, D3D11_MAP_READ, 0, &mapped );
if ( FAILED(hr) )
return hr;
const uint8_t *pSrc = reinterpret_cast<const uint8_t*>(mapped.pData);
for( UINT y=0; y < desc.Height; ++y )
{
XMVECTOR* ptr = scanline.get();
if ( !_LoadScanline( ptr, desc.Width, pSrc, mapped.RowPitch, desc.Format ) )
{
context->Unmap( texture, dindex );
return E_FAIL;
}
const float fV = y*fS + fB;
XMVECTOR* pixel = ptr;
for( UINT x=0; x < desc.Width; ++x, ++pixel )
{
const float fU = x*fS + fB;
float ix, iy, iz;
switch( face )
{
case 0: // Positive X
iz = 1.0f - (2.0f * (float)x + 1.0f) * fPicSize;
iy = 1.0f - (2.0f * (float)y + 1.0f) * fPicSize;
ix = 1.0f;
break;
case 1: // Negative X
iz = -1.0f + (2.0f * (float)x + 1.0f) * fPicSize;
iy = 1.0f - (2.0f * (float)y + 1.0f) * fPicSize;
ix = -1;
break;
case 2: // Positive Y
iz = -1.0f + (2.0f * (float)y + 1.0f) * fPicSize;
iy = 1.0f;
ix = -1.0f + (2.0f * (float)x + 1.0f) * fPicSize;
break;
case 3: // Negative Y
iz = 1.0f - (2.0f * (float)y + 1.0f) * fPicSize;
iy = -1.0f;
ix = -1.0f + (2.0f * (float)x + 1.0f) * fPicSize;
break;
case 4: // Positive Z
iz = 1.0f;
iy = 1.0f - (2.0f * (float)y + 1.0f) * fPicSize;
ix = -1.0f + (2.0f * (float)x + 1.0f) * fPicSize;
break;
case 5: // Negative Z
iz = -1.0f;
iy = 1.0f - (2.0f * (float)y + 1.0f) * fPicSize;
ix = 1.0f - (2.0f * (float)x + 1.0f) * fPicSize;
break;
default:
ix = iy = iz = 0.f;
assert(false);
break;
}
XMVECTOR dir = XMVectorSet( ix, iy, iz, 0 );
dir = XMVector3Normalize( dir );
const float fDiffSolid = 4.0f/((1.0f + fU*fU + fV*fV)*sqrtf(1.0f + fU*fU+fV*fV));
fWt += fDiffSolid;
XMSHEvalDirection(shBuff,order,dir);
XMFLOAT3A clr;
XMStoreFloat3A( &clr, *pixel );
if ( resultR ) XMSHAdd(resultR,order,resultR, XMSHScale(shBuffB,order,shBuff,clr.x*fDiffSolid) );
if ( resultG ) XMSHAdd(resultG,order,resultG, XMSHScale(shBuffB,order,shBuff,clr.y*fDiffSolid) );
if ( resultB ) XMSHAdd(resultB,order,resultB, XMSHScale(shBuffB,order,shBuff,clr.z*fDiffSolid) );
}
pSrc += mapped.RowPitch;
}
context->Unmap( texture, dindex );
}
const float fNormProj = (4.0f*XM_PI)/fWt;
if ( resultR ) XMSHScale(resultR,order,resultR,fNormProj);
if ( resultG ) XMSHScale(resultG,order,resultG,fNormProj);
if ( resultB ) XMSHScale(resultB,order,resultB,fNormProj);
return S_OK;
}
}; // namespace DirectX
//-------------------------------------------------------------------------------------
// DirectXSHD3D11.cpp -- C++ Spherical Harmonics Math Library
//
// THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF
// ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO
// THE IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A
// PARTICULAR PURPOSE.
//
// Copyright (c) Microsoft Corporation. All rights reserved.
//
// http://go.microsoft.com/fwlink/p/?LinkId=262885
//-------------------------------------------------------------------------------------
#include "DirectXSH.h"
#include <d3d11.h>
#include <DirectXPackedVector.h>
#include <assert.h>
#include <memory>
#include <malloc.h>
namespace
{
struct aligned_deleter { void operator()(void* p) { _aligned_free(p); } };
typedef std::unique_ptr<DirectX::XMVECTOR, aligned_deleter> ScopedAlignedArrayXMVECTOR;
template<class T> class ScopedObject
{
public:
explicit ScopedObject( T *p = 0 ) : _pointer(p) {}
~ScopedObject()
{
if ( _pointer )
{
_pointer->Release();
_pointer = nullptr;
}
}
bool IsNull() const { return (!_pointer); }
T& operator*() { return *_pointer; }
T* operator->() { return _pointer; }
T** operator&() { return &_pointer; }
void Reset(T *p = 0) { if ( _pointer ) { _pointer->Release(); } _pointer = p; }
T* Get() const { return _pointer; }
private:
ScopedObject(const ScopedObject&);
ScopedObject& operator=(const ScopedObject&);
T* _pointer;
};
//-------------------------------------------------------------------------------------
// This code is lifted from DirectXTex http://directxtex.codeplex.com/
// If you need additional DXGI format support, see DirectXTexConvert.cpp
//-------------------------------------------------------------------------------------
#define LOAD_SCANLINE( type, func )\
if ( size >= sizeof(type) )\
{\
const type * __restrict sPtr = reinterpret_cast<const type*>(pSource);\
for( size_t icount = 0; icount < ( size - sizeof(type) + 1 ); icount += sizeof(type) )\
{\
if ( dPtr >= ePtr ) break;\
*(dPtr++) = func( sPtr++ );\
}\
return true;\
}\
return false;
#define LOAD_SCANLINE3( type, func, defvec )\
if ( size >= sizeof(type) )\
{\
const type * __restrict sPtr = reinterpret_cast<const type*>(pSource);\
for( size_t icount = 0; icount < ( size - sizeof(type) + 1 ); icount += sizeof(type) )\
{\
XMVECTOR v = func( sPtr++ );\
if ( dPtr >= ePtr ) break;\
*(dPtr++) = XMVectorSelect( defvec, v, g_XMSelect1110 );\
}\
return true;\
}\
return false;
#define LOAD_SCANLINE2( type, func, defvec )\
if ( size >= sizeof(type) )\
{\
const type * __restrict sPtr = reinterpret_cast<const type*>(pSource);\
for( size_t icount = 0; icount < ( size - sizeof(type) + 1 ); icount += sizeof(type) )\
{\
XMVECTOR v = func( sPtr++ );\
if ( dPtr >= ePtr ) break;\
*(dPtr++) = XMVectorSelect( defvec, v, g_XMSelect1100 );\
}\
return true;\
}\
return false;
#pragma warning(push)
#pragma warning(disable : 6101)
_Success_(return)
static bool _LoadScanline( _Out_writes_(count) DirectX::XMVECTOR* pDestination, _In_ size_t count,
_In_reads_bytes_(size) LPCVOID pSource, _In_ size_t size, _In_ DXGI_FORMAT format )
{
assert( pDestination && count > 0 && (((uintptr_t)pDestination & 0xF) == 0) );
assert( pSource && size > 0 );
using namespace DirectX;
using namespace DirectX::PackedVector;
XMVECTOR* __restrict dPtr = pDestination;
if ( !dPtr )
return false;
const XMVECTOR* ePtr = pDestination + count;
switch( format )
{
case DXGI_FORMAT_R32G32B32A32_FLOAT:
{
size_t msize = (size > (sizeof(XMVECTOR)*count)) ? (sizeof(XMVECTOR)*count) : size;
memcpy_s( dPtr, sizeof(XMVECTOR)*count, pSource, msize );
}
return true;
case DXGI_FORMAT_R32G32B32_FLOAT:
LOAD_SCANLINE3( XMFLOAT3, XMLoadFloat3, g_XMIdentityR3 )
case DXGI_FORMAT_R16G16B16A16_FLOAT:
LOAD_SCANLINE( XMHALF4, XMLoadHalf4 )
case DXGI_FORMAT_R32G32_FLOAT:
LOAD_SCANLINE2( XMFLOAT2, XMLoadFloat2, g_XMIdentityR3 )
case DXGI_FORMAT_R11G11B10_FLOAT:
LOAD_SCANLINE3( XMFLOAT3PK, XMLoadFloat3PK, g_XMIdentityR3 );
case DXGI_FORMAT_R16G16_FLOAT:
LOAD_SCANLINE2( XMHALF2, XMLoadHalf2, g_XMIdentityR3 )
case DXGI_FORMAT_R32_FLOAT:
if ( size >= sizeof(float) )
{
const float* __restrict sPtr = reinterpret_cast<const float*>(pSource);
for( size_t icount = 0; icount < size; icount += sizeof(float) )
{
XMVECTOR v = XMLoadFloat( sPtr++ );
if ( dPtr >= ePtr ) break;
*(dPtr++) = XMVectorSelect( g_XMIdentityR3, v, g_XMSelect1000 );
}
return true;
}
return false;
case DXGI_FORMAT_R16_FLOAT:
if ( size >= sizeof(HALF) )
{
const HALF * __restrict sPtr = reinterpret_cast<const HALF*>(pSource);
for( size_t icount = 0; icount < size; icount += sizeof(HALF) )
{
if ( dPtr >= ePtr ) break;
*(dPtr++) = XMVectorSet( XMConvertHalfToFloat(*sPtr++), 0.f, 0.f, 1.f );
}
return true;
}
return false;
default:
return false;
}
}
#pragma warning(pop)
}; // namespace anonymous
namespace DirectX
{
//-------------------------------------------------------------------------------------
// Projects a function represented in a cube map into spherical harmonics.
//
// http://msdn.microsoft.com/en-us/library/windows/desktop/ff476300.aspx
//-------------------------------------------------------------------------------------
HRESULT SHProjectCubeMap( _In_ ID3D11DeviceContext *context,
_In_ size_t order,
_In_ ID3D11Texture2D *cubeMap,
_Out_writes_opt_(order*order) float *resultR,
_Out_writes_opt_(order*order) float *resultG,
_Out_writes_opt_(order*order) float* resultB )
{
if ( !context || !cubeMap )
return E_INVALIDARG;
if ( order < XM_SH_MINORDER || order > XM_SH_MAXORDER )
return E_INVALIDARG;
D3D11_TEXTURE2D_DESC desc;
cubeMap->GetDesc( &desc );
if ( (desc.ArraySize != 6)
|| (desc.Width != desc.Height)
|| (desc.SampleDesc.Count > 1) )
return E_FAIL;
switch( desc.Format )
{
case DXGI_FORMAT_R32G32B32A32_FLOAT:
case DXGI_FORMAT_R32G32B32_FLOAT:
case DXGI_FORMAT_R16G16B16A16_FLOAT:
case DXGI_FORMAT_R32G32_FLOAT:
case DXGI_FORMAT_R11G11B10_FLOAT:
case DXGI_FORMAT_R16G16_FLOAT:
case DXGI_FORMAT_R32_FLOAT:
case DXGI_FORMAT_R16_FLOAT:
// See _LoadScanline to support more pixel formats
break;
default:
return E_FAIL;
}
//--- Create a staging resource copy (if needed) to be able to read data
ID3D11Texture2D* texture = nullptr;
ScopedObject<ID3D11Texture2D> staging;
if ( !(desc.CPUAccessFlags & D3D11_CPU_ACCESS_READ) )
{
D3D11_TEXTURE2D_DESC sdesc = desc;
sdesc.BindFlags = 0;
sdesc.CPUAccessFlags = D3D11_CPU_ACCESS_READ;
sdesc.Usage = D3D11_USAGE_STAGING;
ScopedObject<ID3D11Device> device;
context->GetDevice( &device );
assert( !device.IsNull() );
HRESULT hr = device->CreateTexture2D( &sdesc, nullptr, &staging );
if ( FAILED(hr) )
return hr;
context->CopyResource( staging.Get(), cubeMap );
texture = staging.Get();
}
else
texture = cubeMap;
assert( texture != 0 );
//--- Setup for SH projection
ScopedAlignedArrayXMVECTOR scanline( reinterpret_cast<XMVECTOR*>( _aligned_malloc( sizeof(XMVECTOR)*desc.Width, 16 ) ) );
if ( !scanline )
return E_OUTOFMEMORY;
assert( desc.Width > 0 );
float fSize = static_cast<float>( desc.Width );
float fPicSize = 1.0f / fSize;
// index from [0,W-1], f(0) maps to -1 + 1/W, f(W-1) maps to 1 - 1/w
// linear function x*S +B, 1st constraint means B is (-1+1/W), plug into
// second and solve for S: S = 2*(1-1/W)/(W-1). The old code that did
// this was incorrect - but only for computing the differential solid
// angle, where the final value was 1.0 instead of 1-1/w...
float fB = -1.0f + 1.0f/fSize;
float fS = ( desc.Width > 1 ) ? (2.0f*(1.0f-1.0f/fSize)/(fSize-1.0f)) : 0.f;
// clear out accumulation variables
float fWt = 0.0f;
if ( resultR )
memset( resultR, 0, sizeof(float)*order*order );
if ( resultG )
memset( resultG, 0, sizeof(float)*order*order );
if ( resultB )
memset( resultB, 0, sizeof(float)*order*order );
float shBuff[XM_SH_MAXORDER*XM_SH_MAXORDER];
float shBuffB[XM_SH_MAXORDER*XM_SH_MAXORDER];
//--- Process each face of the cubemap
for (UINT face=0; face < 6; ++face )
{
UINT dindex = D3D11CalcSubresource( 0, face, desc.MipLevels );
D3D11_MAPPED_SUBRESOURCE mapped;
HRESULT hr = context->Map( texture, dindex, D3D11_MAP_READ, 0, &mapped );
if ( FAILED(hr) )
return hr;
const uint8_t *pSrc = reinterpret_cast<const uint8_t*>(mapped.pData);
for( UINT y=0; y < desc.Height; ++y )
{
XMVECTOR* ptr = scanline.get();
if ( !_LoadScanline( ptr, desc.Width, pSrc, mapped.RowPitch, desc.Format ) )
{
context->Unmap( texture, dindex );
return E_FAIL;
}
const float fV = y*fS + fB;
XMVECTOR* pixel = ptr;
for( UINT x=0; x < desc.Width; ++x, ++pixel )
{
const float fU = x*fS + fB;
float ix, iy, iz;
switch( face )
{
case 0: // Positive X
iz = 1.0f - (2.0f * (float)x + 1.0f) * fPicSize;
iy = 1.0f - (2.0f * (float)y + 1.0f) * fPicSize;
ix = 1.0f;
break;
case 1: // Negative X
iz = -1.0f + (2.0f * (float)x + 1.0f) * fPicSize;
iy = 1.0f - (2.0f * (float)y + 1.0f) * fPicSize;
ix = -1;
break;
case 2: // Positive Y
iz = -1.0f + (2.0f * (float)y + 1.0f) * fPicSize;
iy = 1.0f;
ix = -1.0f + (2.0f * (float)x + 1.0f) * fPicSize;
break;
case 3: // Negative Y
iz = 1.0f - (2.0f * (float)y + 1.0f) * fPicSize;
iy = -1.0f;
ix = -1.0f + (2.0f * (float)x + 1.0f) * fPicSize;
break;
case 4: // Positive Z
iz = 1.0f;
iy = 1.0f - (2.0f * (float)y + 1.0f) * fPicSize;
ix = -1.0f + (2.0f * (float)x + 1.0f) * fPicSize;
break;
case 5: // Negative Z
iz = -1.0f;
iy = 1.0f - (2.0f * (float)y + 1.0f) * fPicSize;
ix = 1.0f - (2.0f * (float)x + 1.0f) * fPicSize;
break;
default:
ix = iy = iz = 0.f;
assert(false);
break;
}
XMVECTOR dir = XMVectorSet( ix, iy, iz, 0 );
dir = XMVector3Normalize( dir );
const float fDiffSolid = 4.0f/((1.0f + fU*fU + fV*fV)*sqrtf(1.0f + fU*fU+fV*fV));
fWt += fDiffSolid;
XMSHEvalDirection(shBuff,order,dir);
XMFLOAT3A clr;
XMStoreFloat3A( &clr, *pixel );
if ( resultR ) XMSHAdd(resultR,order,resultR, XMSHScale(shBuffB,order,shBuff,clr.x*fDiffSolid) );
if ( resultG ) XMSHAdd(resultG,order,resultG, XMSHScale(shBuffB,order,shBuff,clr.y*fDiffSolid) );
if ( resultB ) XMSHAdd(resultB,order,resultB, XMSHScale(shBuffB,order,shBuff,clr.z*fDiffSolid) );
}
pSrc += mapped.RowPitch;
}
context->Unmap( texture, dindex );
}
const float fNormProj = (4.0f*XM_PI)/fWt;
if ( resultR ) XMSHScale(resultR,order,resultR,fNormProj);
if ( resultG ) XMSHScale(resultG,order,resultG,fNormProj);
if ( resultB ) XMSHScale(resultB,order,resultB,fNormProj);
return S_OK;
}
}; // namespace DirectX

File diff suppressed because it is too large Load Diff