mirror of
https://github.com/microsoft/DirectXMath
synced 2024-11-08 21:50:09 +00:00
DirectXMath 3.03
This commit is contained in:
parent
409c3a3646
commit
fd7f30458d
@ -17,7 +17,7 @@
|
||||
#error DirectX Math requires C++
|
||||
#endif
|
||||
|
||||
#define DIRECTX_MATH_VERSION 302
|
||||
#define DIRECTX_MATH_VERSION 303
|
||||
|
||||
#if !defined(_XM_BIGENDIAN_) && !defined(_XM_LITTLEENDIAN_)
|
||||
#if defined(_M_AMD64) || defined(_M_IX86) || defined(_M_ARM)
|
||||
@ -29,6 +29,8 @@
|
||||
#endif
|
||||
#endif // !_XM_BIGENDIAN_ && !_XM_LITTLEENDIAN_
|
||||
|
||||
|
||||
|
||||
#if !defined(_XM_ARM_NEON_INTRINSICS_) && !defined(_XM_SSE_INTRINSICS_) && !defined(_XM_VMX128_INTRINSICS_) && !defined(_XM_NO_INTRINSICS_)
|
||||
#if defined(_M_IX86) || defined(_M_AMD64)
|
||||
#define _XM_SSE_INTRINSICS_
|
||||
@ -62,15 +64,7 @@
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef _WIN32_WCE
|
||||
inline float powf(float _X, float _Y) { return ((float)pow((double)_X, (double)_Y)); }
|
||||
inline float logf(float _X) { return ((float)log((double)_X)); }
|
||||
inline float tanf(float _X) { return ((float)tan((double)_X)); }
|
||||
inline float atanf(float _X) { return ((float)atan((double)_X)); }
|
||||
inline float sinhf(float _X) { return ((float)sinh((double)_X)); }
|
||||
inline float coshf(float _X) { return ((float)cosh((double)_X)); }
|
||||
inline float tanhf(float _X) { return ((float)tanh((double)_X)); }
|
||||
#endif
|
||||
|
||||
|
||||
#include <sal.h>
|
||||
#include <assert.h>
|
||||
@ -261,8 +255,8 @@ __declspec(align(16)) struct XMVECTORF32
|
||||
inline operator XMVECTOR() const { return v; }
|
||||
inline operator const float*() const { return f; }
|
||||
#if !defined(_XM_NO_INTRINSICS_) && defined(_XM_SSE_INTRINSICS_)
|
||||
inline operator __m128i() const { return reinterpret_cast<const __m128i *>(&v)[0]; }
|
||||
inline operator __m128d() const { return reinterpret_cast<const __m128d *>(&v)[0]; }
|
||||
inline operator __m128i() const { return _mm_castps_si128(v); }
|
||||
inline operator __m128d() const { return _mm_castps_pd(v); }
|
||||
#endif
|
||||
};
|
||||
|
||||
@ -276,8 +270,8 @@ __declspec(align(16)) struct XMVECTORI32
|
||||
|
||||
inline operator XMVECTOR() const { return v; }
|
||||
#if !defined(_XM_NO_INTRINSICS_) && defined(_XM_SSE_INTRINSICS_)
|
||||
inline operator __m128i() const { return reinterpret_cast<const __m128i *>(&v)[0]; }
|
||||
inline operator __m128d() const { return reinterpret_cast<const __m128d *>(&v)[0]; }
|
||||
inline operator __m128i() const { return _mm_castps_si128(v); }
|
||||
inline operator __m128d() const { return _mm_castps_pd(v); }
|
||||
#endif
|
||||
};
|
||||
|
||||
@ -291,8 +285,8 @@ __declspec(align(16)) struct XMVECTORU8
|
||||
|
||||
inline operator XMVECTOR() const { return v; }
|
||||
#if !defined(_XM_NO_INTRINSICS_) && defined(_XM_SSE_INTRINSICS_)
|
||||
inline operator __m128i() const { return reinterpret_cast<const __m128i *>(&v)[0]; }
|
||||
inline operator __m128d() const { return reinterpret_cast<const __m128d *>(&v)[0]; }
|
||||
inline operator __m128i() const { return _mm_castps_si128(v); }
|
||||
inline operator __m128d() const { return _mm_castps_pd(v); }
|
||||
#endif
|
||||
};
|
||||
|
||||
@ -306,8 +300,8 @@ __declspec(align(16)) struct XMVECTORU32
|
||||
|
||||
inline operator XMVECTOR() const { return v; }
|
||||
#if !defined(_XM_NO_INTRINSICS_) && defined(_XM_SSE_INTRINSICS_)
|
||||
inline operator __m128i() const { return reinterpret_cast<const __m128i *>(&v)[0]; }
|
||||
inline operator __m128d() const { return reinterpret_cast<const __m128d *>(&v)[0]; }
|
||||
inline operator __m128i() const { return _mm_castps_si128(v); }
|
||||
inline operator __m128d() const { return _mm_castps_pd(v); }
|
||||
#endif
|
||||
};
|
||||
|
||||
@ -350,6 +344,7 @@ struct XMMATRIX
|
||||
__declspec(align(16)) struct XMMATRIX
|
||||
#endif
|
||||
{
|
||||
#ifdef _XM_NO_INTRINSICS_
|
||||
union
|
||||
{
|
||||
XMVECTOR r[4];
|
||||
@ -362,6 +357,9 @@ __declspec(align(16)) struct XMMATRIX
|
||||
};
|
||||
float m[4][4];
|
||||
};
|
||||
#else
|
||||
XMVECTOR r[4];
|
||||
#endif
|
||||
|
||||
XMMATRIX() {}
|
||||
XMMATRIX(FXMVECTOR R0, FXMVECTOR R1, FXMVECTOR R2, GXMVECTOR R3) { r[0] = R0; r[1] = R1; r[2] = R2; r[3] = R3; }
|
||||
@ -371,8 +369,10 @@ __declspec(align(16)) struct XMMATRIX
|
||||
float m30, float m31, float m32, float m33);
|
||||
explicit XMMATRIX(_In_reads_(16) const float *pArray);
|
||||
|
||||
#ifdef _XM_NO_INTRINSICS_
|
||||
float operator() (size_t Row, size_t Column) const { return m[Row][Column]; }
|
||||
float& operator() (size_t Row, size_t Column) { return m[Row][Column]; }
|
||||
#endif
|
||||
|
||||
XMMATRIX& operator= (const XMMATRIX& M) { r[0] = M.r[0]; r[1] = M.r[1]; r[2] = M.r[2]; r[3] = M.r[3]; return *this; }
|
||||
|
||||
@ -403,7 +403,7 @@ struct XMFLOAT2
|
||||
|
||||
XMFLOAT2() {}
|
||||
XMFLOAT2(float _x, float _y) : x(_x), y(_y) {}
|
||||
XMFLOAT2(_In_reads_(2) const float *pArray) : x(pArray[0]), y(pArray[1]) {}
|
||||
explicit XMFLOAT2(_In_reads_(2) const float *pArray) : x(pArray[0]), y(pArray[1]) {}
|
||||
|
||||
XMFLOAT2& operator= (const XMFLOAT2& Float2) { x = Float2.x; y = Float2.y; return *this; }
|
||||
};
|
||||
@ -413,7 +413,7 @@ __declspec(align(16)) struct XMFLOAT2A : public XMFLOAT2
|
||||
{
|
||||
XMFLOAT2A() : XMFLOAT2() {}
|
||||
XMFLOAT2A(float _x, float _y) : XMFLOAT2(_x, _y) {}
|
||||
XMFLOAT2A(_In_reads_(2) const float *pArray) : XMFLOAT2(pArray) {}
|
||||
explicit XMFLOAT2A(_In_reads_(2) const float *pArray) : XMFLOAT2(pArray) {}
|
||||
|
||||
XMFLOAT2A& operator= (const XMFLOAT2A& Float2) { x = Float2.x; y = Float2.y; return *this; }
|
||||
};
|
||||
@ -455,7 +455,7 @@ struct XMFLOAT3
|
||||
|
||||
XMFLOAT3() {}
|
||||
XMFLOAT3(float _x, float _y, float _z) : x(_x), y(_y), z(_z) {}
|
||||
XMFLOAT3(_In_reads_(3) const float *pArray) : x(pArray[0]), y(pArray[1]), z(pArray[2]) {}
|
||||
explicit XMFLOAT3(_In_reads_(3) const float *pArray) : x(pArray[0]), y(pArray[1]), z(pArray[2]) {}
|
||||
|
||||
XMFLOAT3& operator= (const XMFLOAT3& Float3) { x = Float3.x; y = Float3.y; z = Float3.z; return *this; }
|
||||
};
|
||||
@ -465,7 +465,7 @@ __declspec(align(16)) struct XMFLOAT3A : public XMFLOAT3
|
||||
{
|
||||
XMFLOAT3A() : XMFLOAT3() {}
|
||||
XMFLOAT3A(float _x, float _y, float _z) : XMFLOAT3(_x, _y, _z) {}
|
||||
XMFLOAT3A(_In_reads_(3) const float *pArray) : XMFLOAT3(pArray) {}
|
||||
explicit XMFLOAT3A(_In_reads_(3) const float *pArray) : XMFLOAT3(pArray) {}
|
||||
|
||||
XMFLOAT3A& operator= (const XMFLOAT3A& Float3) { x = Float3.x; y = Float3.y; z = Float3.z; return *this; }
|
||||
};
|
||||
@ -482,7 +482,7 @@ struct XMINT3
|
||||
XMINT3(int32_t _x, int32_t _y, int32_t _z) : x(_x), y(_y), z(_z) {}
|
||||
explicit XMINT3(_In_reads_(3) const int32_t *pArray) : x(pArray[0]), y(pArray[1]), z(pArray[2]) {}
|
||||
|
||||
XMINT3& operator= (const XMINT3& Int3) { x = Int3.x; y = Int3.y; z = Int3.z; return *this; }
|
||||
XMINT3& operator= (const XMINT3& i3) { x = i3.x; y = i3.y; z = i3.z; return *this; }
|
||||
};
|
||||
|
||||
// 3D Vector; 32 bit unsigned integer components
|
||||
@ -496,7 +496,7 @@ struct XMUINT3
|
||||
XMUINT3(uint32_t _x, uint32_t _y, uint32_t _z) : x(_x), y(_y), z(_z) {}
|
||||
explicit XMUINT3(_In_reads_(3) const uint32_t *pArray) : x(pArray[0]), y(pArray[1]), z(pArray[2]) {}
|
||||
|
||||
XMUINT3& operator= (const XMUINT3& UInt3) { x = UInt3.x; y = UInt3.y; z = UInt3.z; return *this; }
|
||||
XMUINT3& operator= (const XMUINT3& u3) { x = u3.x; y = u3.y; z = u3.z; return *this; }
|
||||
};
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
@ -510,7 +510,7 @@ struct XMFLOAT4
|
||||
|
||||
XMFLOAT4() {}
|
||||
XMFLOAT4(float _x, float _y, float _z, float _w) : x(_x), y(_y), z(_z), w(_w) {}
|
||||
XMFLOAT4(_In_reads_(4) const float *pArray) : x(pArray[0]), y(pArray[1]), z(pArray[2]), w(pArray[3]) {}
|
||||
explicit XMFLOAT4(_In_reads_(4) const float *pArray) : x(pArray[0]), y(pArray[1]), z(pArray[2]), w(pArray[3]) {}
|
||||
|
||||
XMFLOAT4& operator= (const XMFLOAT4& Float4) { x = Float4.x; y = Float4.y; z = Float4.z; w = Float4.w; return *this; }
|
||||
};
|
||||
@ -520,7 +520,7 @@ __declspec(align(16)) struct XMFLOAT4A : public XMFLOAT4
|
||||
{
|
||||
XMFLOAT4A() : XMFLOAT4() {}
|
||||
XMFLOAT4A(float _x, float _y, float _z, float _w) : XMFLOAT4(_x, _y, _z, _w) {}
|
||||
XMFLOAT4A(_In_reads_(4) const float *pArray) : XMFLOAT4(pArray) {}
|
||||
explicit XMFLOAT4A(_In_reads_(4) const float *pArray) : XMFLOAT4(pArray) {}
|
||||
|
||||
XMFLOAT4A& operator= (const XMFLOAT4A& Float4) { x = Float4.x; y = Float4.y; z = Float4.z; w = Float4.w; return *this; }
|
||||
};
|
||||
@ -1368,6 +1368,8 @@ template<class T> inline T XMMax(T a, T b) { return (a > b) ? a : b; }
|
||||
|
||||
#if defined(_XM_SSE_INTRINSICS_) && !defined(_XM_NO_INTRINSICS_)
|
||||
|
||||
#define XM_PERMUTE_PS( v, c ) _mm_shuffle_ps( v, v, c )
|
||||
|
||||
// PermuteHelper internal template (SSE only)
|
||||
namespace Internal
|
||||
{
|
||||
@ -1384,8 +1386,8 @@ namespace Internal
|
||||
WhichW ? 0xFFFFFFFF : 0,
|
||||
};
|
||||
|
||||
XMVECTOR shuffled1 = _mm_shuffle_ps(v1, v1, Shuffle);
|
||||
XMVECTOR shuffled2 = _mm_shuffle_ps(v2, v2, Shuffle);
|
||||
XMVECTOR shuffled1 = XM_PERMUTE_PS(v1, Shuffle);
|
||||
XMVECTOR shuffled2 = XM_PERMUTE_PS(v2, Shuffle);
|
||||
|
||||
XMVECTOR masked1 = _mm_andnot_ps(selectMask, shuffled1);
|
||||
XMVECTOR masked2 = _mm_and_ps(selectMask, shuffled2);
|
||||
@ -1397,13 +1399,13 @@ namespace Internal
|
||||
// Fast path for permutes that only read from the first vector.
|
||||
template<uint32_t Shuffle> struct PermuteHelper<Shuffle, false, false, false, false>
|
||||
{
|
||||
static XMVECTOR Permute(FXMVECTOR v1, FXMVECTOR v2) { (v2); return _mm_shuffle_ps(v1, v1, Shuffle); }
|
||||
static XMVECTOR Permute(FXMVECTOR v1, FXMVECTOR v2) { (v2); return XM_PERMUTE_PS(v1, Shuffle); }
|
||||
};
|
||||
|
||||
// Fast path for permutes that only read from the second vector.
|
||||
template<uint32_t Shuffle> struct PermuteHelper<Shuffle, true, true, true, true>
|
||||
{
|
||||
static XMVECTOR Permute(FXMVECTOR v1, FXMVECTOR v2){ (v1); return _mm_shuffle_ps(v2, v2, Shuffle); }
|
||||
static XMVECTOR Permute(FXMVECTOR v1, FXMVECTOR v2){ (v1); return XM_PERMUTE_PS(v2, Shuffle); }
|
||||
};
|
||||
|
||||
// Fast path for permutes that read XY from the first vector, ZW from the second.
|
||||
@ -1488,7 +1490,7 @@ template<> inline XMVECTOR XMVectorPermute<1,2,3,4>(FXMVECTOR V1, FXMVECTOR V2)
|
||||
template<> inline XMVECTOR XMVectorPermute<2,3,4,5>(FXMVECTOR V1, FXMVECTOR V2) { return vextq_f32(V1, V2, 2); }
|
||||
template<> inline XMVECTOR XMVectorPermute<3,4,5,6>(FXMVECTOR V1, FXMVECTOR V2) { return vextq_f32(V1, V2, 3); }
|
||||
|
||||
#endif _XM_ARM_NEON_INTRINSICS_ && !_XM_NO_INTRINSICS_
|
||||
#endif // _XM_ARM_NEON_INTRINSICS_ && !_XM_NO_INTRINSICS_
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
@ -1502,7 +1504,7 @@ template<uint32_t SwizzleX, uint32_t SwizzleY, uint32_t SwizzleZ, uint32_t Swizz
|
||||
static_assert(SwizzleW <= 3, "SwizzleW template parameter out of range");
|
||||
|
||||
#if defined(_XM_SSE_INTRINSICS_) && !defined(_XM_NO_INTRINSICS_)
|
||||
return _mm_shuffle_ps( V, V, _MM_SHUFFLE( SwizzleW, SwizzleZ, SwizzleY, SwizzleX ) );
|
||||
return XM_PERMUTE_PS( V, _MM_SHUFFLE( SwizzleW, SwizzleZ, SwizzleY, SwizzleX ) );
|
||||
#elif defined(_XM_VMX128_INTRINSICS_) && !defined(_XM_NO_INTRINSICS_)
|
||||
return __vpermwi(V, ((SwizzleX & 3) << 6) | ((SwizzleY & 3) << 4) | ((SwizzleZ & 3) << 2) | (SwizzleW & 3) );
|
||||
#else
|
||||
@ -1515,6 +1517,7 @@ template<uint32_t SwizzleX, uint32_t SwizzleY, uint32_t SwizzleZ, uint32_t Swizz
|
||||
// Specialized swizzles
|
||||
template<> inline XMVECTOR XMVectorSwizzle<0,1,2,3>(FXMVECTOR V) { return V; }
|
||||
|
||||
|
||||
#if defined(_XM_ARM_NEON_INTRINSICS_) && !defined(_XM_NO_INTRINSICS_)
|
||||
|
||||
template<> inline XMVECTOR XMVectorSwizzle<0,0,0,0>(FXMVECTOR V) { return vdupq_lane_f32( vget_low_f32(V), 0); }
|
||||
@ -1548,7 +1551,7 @@ template<> inline XMVECTOR XMVectorSwizzle<1,2,3,0>(FXMVECTOR V) { return vextq_
|
||||
template<> inline XMVECTOR XMVectorSwizzle<2,3,0,1>(FXMVECTOR V) { return vextq_f32(V, V, 2); }
|
||||
template<> inline XMVECTOR XMVectorSwizzle<3,0,1,2>(FXMVECTOR V) { return vextq_f32(V, V, 3); }
|
||||
|
||||
#endif _XM_ARM_NEON_INTRINSICS_ && !_XM_NO_INTRINSICS_
|
||||
#endif // _XM_ARM_NEON_INTRINSICS_ && !_XM_NO_INTRINSICS_
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
@ -1760,7 +1763,7 @@ inline XMVECTOR XMVectorSetBinaryConstant(uint32_t C0, uint32_t C1, uint32_t C2,
|
||||
vTemp = _mm_cmpeq_epi32(vTemp,g_vMask1);
|
||||
// 0xFFFFFFFF -> 1.0f, 0x00000000 -> 0.0f
|
||||
vTemp = _mm_and_si128(vTemp,g_XMOne);
|
||||
return reinterpret_cast<const __m128 *>(&vTemp)[0];
|
||||
return _mm_castsi128_ps(vTemp);
|
||||
#endif
|
||||
}
|
||||
|
||||
@ -1799,7 +1802,7 @@ inline XMVECTOR XMVectorSplatConstant(int32_t IntConstant, uint32_t DivExponent)
|
||||
// Splat the scalar value (It's really a float)
|
||||
vScale = _mm_set1_epi32(uScale);
|
||||
// Multiply by the reciprocal (Perform a right shift by DivExponent)
|
||||
vResult = _mm_mul_ps(vResult,reinterpret_cast<const __m128 *>(&vScale)[0]);
|
||||
vResult = _mm_mul_ps(vResult,_mm_castsi128_ps(vScale));
|
||||
return vResult;
|
||||
#endif
|
||||
}
|
||||
@ -1824,13 +1827,14 @@ inline XMVECTOR XMVectorSplatConstantInt(int32_t IntConstant)
|
||||
}
|
||||
|
||||
// Implemented for VMX128 intrinsics as #defines aboves
|
||||
#endif _XM_NO_INTRINSICS_ || _XM_SSE_INTRINSICS_ || _XM_ARM_NEON_INTRINSICS_
|
||||
#endif // _XM_NO_INTRINSICS_ || _XM_SSE_INTRINSICS_ || _XM_ARM_NEON_INTRINSICS_
|
||||
|
||||
#include "DirectXMathConvert.inl"
|
||||
#include "DirectXMathVector.inl"
|
||||
#include "DirectXMathMatrix.inl"
|
||||
#include "DirectXMathMisc.inl"
|
||||
|
||||
|
||||
#pragma prefast(pop)
|
||||
#pragma warning(pop)
|
||||
|
||||
|
@ -50,12 +50,12 @@ inline XMVECTOR XMConvertVectorIntToFloat
|
||||
return vmulq_f32( vResult, vScale );
|
||||
#else // _XM_SSE_INTRINSICS_
|
||||
// Convert to floats
|
||||
XMVECTOR vResult = _mm_cvtepi32_ps(reinterpret_cast<const __m128i *>(&VInt)[0]);
|
||||
XMVECTOR vResult = _mm_cvtepi32_ps(_mm_castps_si128(VInt));
|
||||
// Convert DivExponent into 1.0f/(1<<DivExponent)
|
||||
uint32_t uScale = 0x3F800000U - (DivExponent << 23);
|
||||
// Splat the scalar value
|
||||
__m128i vScale = _mm_set1_epi32(uScale);
|
||||
vResult = _mm_mul_ps(vResult,reinterpret_cast<const __m128 *>(&vScale)[0]);
|
||||
vResult = _mm_mul_ps(vResult,_mm_castsi128_ps(vScale));
|
||||
return vResult;
|
||||
#endif
|
||||
}
|
||||
@ -108,7 +108,7 @@ inline XMVECTOR XMConvertVectorFloatToInt
|
||||
__m128i vResulti = _mm_cvttps_epi32(vResult);
|
||||
// If there was positive overflow, set to 0x7FFFFFFF
|
||||
vResult = _mm_and_ps(vOverflow,g_XMAbsMask);
|
||||
vOverflow = _mm_andnot_ps(vOverflow,reinterpret_cast<const __m128 *>(&vResulti)[0]);
|
||||
vOverflow = _mm_andnot_ps(vOverflow,_mm_castsi128_ps(vResulti));
|
||||
vOverflow = _mm_or_ps(vOverflow,vResult);
|
||||
return vOverflow;
|
||||
#endif
|
||||
@ -143,17 +143,17 @@ inline XMVECTOR XMConvertVectorUIntToFloat
|
||||
// Force all values positive
|
||||
XMVECTOR vResult = _mm_xor_ps(VUInt,vMask);
|
||||
// Convert to floats
|
||||
vResult = _mm_cvtepi32_ps(reinterpret_cast<const __m128i *>(&vResult)[0]);
|
||||
vResult = _mm_cvtepi32_ps(_mm_castps_si128(vResult));
|
||||
// Convert 0x80000000 -> 0xFFFFFFFF
|
||||
__m128i iMask = _mm_srai_epi32(reinterpret_cast<const __m128i *>(&vMask)[0],31);
|
||||
__m128i iMask = _mm_srai_epi32(_mm_castps_si128(vMask),31);
|
||||
// For only the ones that are too big, add the fixup
|
||||
vMask = _mm_and_ps(reinterpret_cast<const __m128 *>(&iMask)[0],g_XMFixUnsigned);
|
||||
vMask = _mm_and_ps(_mm_castsi128_ps(iMask),g_XMFixUnsigned);
|
||||
vResult = _mm_add_ps(vResult,vMask);
|
||||
// Convert DivExponent into 1.0f/(1<<DivExponent)
|
||||
uint32_t uScale = 0x3F800000U - (DivExponent << 23);
|
||||
// Splat
|
||||
iMask = _mm_set1_epi32(uScale);
|
||||
vResult = _mm_mul_ps(vResult,reinterpret_cast<const __m128 *>(&iMask)[0]);
|
||||
vResult = _mm_mul_ps(vResult,_mm_castsi128_ps(iMask));
|
||||
return vResult;
|
||||
#endif
|
||||
}
|
||||
@ -213,7 +213,7 @@ inline XMVECTOR XMConvertVectorFloatToUInt
|
||||
__m128i vResulti = _mm_cvttps_epi32(vResult);
|
||||
// Convert from signed to unsigned pnly if greater than 0x80000000
|
||||
vMask = _mm_and_ps(vMask,g_XMNegativeZero);
|
||||
vResult = _mm_xor_ps(reinterpret_cast<const __m128 *>(&vResulti)[0],vMask);
|
||||
vResult = _mm_xor_ps(_mm_castsi128_ps(vResulti),vMask);
|
||||
// On those that are too large, set to 0xFFFFFFFF
|
||||
vResult = _mm_or_ps(vResult,vOverflow);
|
||||
return vResult;
|
||||
@ -404,7 +404,7 @@ inline XMVECTOR XMLoadSInt2
|
||||
__m128 x = _mm_load_ss( reinterpret_cast<const float*>(&pSource->x) );
|
||||
__m128 y = _mm_load_ss( reinterpret_cast<const float*>(&pSource->y) );
|
||||
__m128 V = _mm_unpacklo_ps( x, y );
|
||||
return _mm_cvtepi32_ps(reinterpret_cast<const __m128i *>(&V)[0]);
|
||||
return _mm_cvtepi32_ps(_mm_castps_si128(V));
|
||||
#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
|
||||
#endif // _XM_VMX128_INTRINSICS_
|
||||
}
|
||||
@ -439,11 +439,11 @@ inline XMVECTOR XMLoadUInt2
|
||||
// Force all values positive
|
||||
XMVECTOR vResult = _mm_xor_ps(V,vMask);
|
||||
// Convert to floats
|
||||
vResult = _mm_cvtepi32_ps(reinterpret_cast<const __m128i *>(&vResult)[0]);
|
||||
vResult = _mm_cvtepi32_ps(_mm_castps_si128(vResult));
|
||||
// Convert 0x80000000 -> 0xFFFFFFFF
|
||||
__m128i iMask = _mm_srai_epi32(reinterpret_cast<const __m128i *>(&vMask)[0],31);
|
||||
__m128i iMask = _mm_srai_epi32(_mm_castps_si128(vMask),31);
|
||||
// For only the ones that are too big, add the fixup
|
||||
vMask = _mm_and_ps(reinterpret_cast<const __m128 *>(&iMask)[0],g_XMFixUnsigned);
|
||||
vMask = _mm_and_ps(_mm_castsi128_ps(iMask),g_XMFixUnsigned);
|
||||
vResult = _mm_add_ps(vResult,vMask);
|
||||
return vResult;
|
||||
#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
|
||||
@ -596,7 +596,7 @@ inline XMVECTOR XMLoadSInt3
|
||||
__m128 z = _mm_load_ss( reinterpret_cast<const float*>(&pSource->z) );
|
||||
__m128 xy = _mm_unpacklo_ps( x, y );
|
||||
__m128 V = _mm_movelh_ps( xy, z );
|
||||
return _mm_cvtepi32_ps(reinterpret_cast<const __m128i *>(&V)[0]);
|
||||
return _mm_cvtepi32_ps(_mm_castps_si128(V));
|
||||
#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
|
||||
#endif // _XM_VMX128_INTRINSICS_
|
||||
}
|
||||
@ -634,11 +634,11 @@ inline XMVECTOR XMLoadUInt3
|
||||
// Force all values positive
|
||||
XMVECTOR vResult = _mm_xor_ps(V,vMask);
|
||||
// Convert to floats
|
||||
vResult = _mm_cvtepi32_ps(reinterpret_cast<const __m128i *>(&vResult)[0]);
|
||||
vResult = _mm_cvtepi32_ps(_mm_castps_si128(vResult));
|
||||
// Convert 0x80000000 -> 0xFFFFFFFF
|
||||
__m128i iMask = _mm_srai_epi32(reinterpret_cast<const __m128i *>(&vMask)[0],31);
|
||||
__m128i iMask = _mm_srai_epi32(_mm_castps_si128(vMask),31);
|
||||
// For only the ones that are too big, add the fixup
|
||||
vMask = _mm_and_ps(reinterpret_cast<const __m128 *>(&iMask)[0],g_XMFixUnsigned);
|
||||
vMask = _mm_and_ps(_mm_castsi128_ps(iMask),g_XMFixUnsigned);
|
||||
vResult = _mm_add_ps(vResult,vMask);
|
||||
return vResult;
|
||||
|
||||
@ -792,15 +792,15 @@ inline XMVECTOR XMLoadUInt4
|
||||
__m128i V = _mm_loadu_si128( reinterpret_cast<const __m128i*>(pSource) );
|
||||
// For the values that are higher than 0x7FFFFFFF, a fixup is needed
|
||||
// Determine which ones need the fix.
|
||||
XMVECTOR vMask = _mm_and_ps(reinterpret_cast<const __m128 *>(&V)[0],g_XMNegativeZero);
|
||||
XMVECTOR vMask = _mm_and_ps(_mm_castsi128_ps(V),g_XMNegativeZero);
|
||||
// Force all values positive
|
||||
XMVECTOR vResult = _mm_xor_ps(reinterpret_cast<const __m128 *>(&V)[0],vMask);
|
||||
XMVECTOR vResult = _mm_xor_ps(_mm_castsi128_ps(V),vMask);
|
||||
// Convert to floats
|
||||
vResult = _mm_cvtepi32_ps(reinterpret_cast<const __m128i *>(&vResult)[0]);
|
||||
vResult = _mm_cvtepi32_ps(_mm_castps_si128(vResult));
|
||||
// Convert 0x80000000 -> 0xFFFFFFFF
|
||||
__m128i iMask = _mm_srai_epi32(reinterpret_cast<const __m128i *>(&vMask)[0],31);
|
||||
__m128i iMask = _mm_srai_epi32(_mm_castps_si128(vMask),31);
|
||||
// For only the ones that are too big, add the fixup
|
||||
vMask = _mm_and_ps(reinterpret_cast<const __m128 *>(&iMask)[0],g_XMFixUnsigned);
|
||||
vMask = _mm_and_ps(_mm_castsi128_ps(iMask),g_XMFixUnsigned);
|
||||
vResult = _mm_add_ps(vResult,vMask);
|
||||
return vResult;
|
||||
#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
|
||||
@ -934,7 +934,7 @@ inline XMMATRIX XMLoadFloat4x3
|
||||
// vTemp2 = y2,z2,x2,x2
|
||||
vTemp2 = _mm_shuffle_ps(vTemp2,vTemp1,_MM_SHUFFLE(3,3,1,0));
|
||||
// vTemp2 = x2,y2,z2,z2
|
||||
vTemp2 = _mm_shuffle_ps(vTemp2,vTemp2,_MM_SHUFFLE(1,1,0,2));
|
||||
vTemp2 = XM_PERMUTE_PS(vTemp2,_MM_SHUFFLE(1,1,0,2));
|
||||
// vTemp1 = x1,y1,z1,0
|
||||
vTemp1 = _mm_and_ps(vTemp1,g_XMMask3);
|
||||
// vTemp2 = x2,y2,z2,0
|
||||
@ -942,13 +942,13 @@ inline XMMATRIX XMLoadFloat4x3
|
||||
// vTemp3 = x3,y3,z3,0
|
||||
vTemp3 = _mm_and_ps(vTemp3,g_XMMask3);
|
||||
// vTemp4i = x4,y4,z4,0
|
||||
__m128i vTemp4i = _mm_srli_si128(reinterpret_cast<const __m128i *>(&vTemp4)[0],32/8);
|
||||
__m128i vTemp4i = _mm_srli_si128(_mm_castps_si128(vTemp4),32/8);
|
||||
// vTemp4i = x4,y4,z4,1.0f
|
||||
vTemp4i = _mm_or_si128(vTemp4i,g_XMIdentityR3);
|
||||
XMMATRIX M(vTemp1,
|
||||
vTemp2,
|
||||
vTemp3,
|
||||
reinterpret_cast<const __m128 *>(&vTemp4i)[0]);
|
||||
_mm_castsi128_ps(vTemp4i));
|
||||
return M;
|
||||
#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
|
||||
#endif // _XM_VMX128_INTRINSICS_
|
||||
@ -1016,7 +1016,7 @@ inline XMMATRIX XMLoadFloat4x3A
|
||||
// vTemp2 = y2,z2,x2,x2
|
||||
vTemp2 = _mm_shuffle_ps(vTemp2,vTemp1,_MM_SHUFFLE(3,3,1,0));
|
||||
// vTemp2 = x2,y2,z2,z2
|
||||
vTemp2 = _mm_shuffle_ps(vTemp2,vTemp2,_MM_SHUFFLE(1,1,0,2));
|
||||
vTemp2 = XM_PERMUTE_PS(vTemp2,_MM_SHUFFLE(1,1,0,2));
|
||||
// vTemp1 = x1,y1,z1,0
|
||||
vTemp1 = _mm_and_ps(vTemp1,g_XMMask3);
|
||||
// vTemp2 = x2,y2,z2,0
|
||||
@ -1024,13 +1024,13 @@ inline XMMATRIX XMLoadFloat4x3A
|
||||
// vTemp3 = x3,y3,z3,0
|
||||
vTemp3 = _mm_and_ps(vTemp3,g_XMMask3);
|
||||
// vTemp4i = x4,y4,z4,0
|
||||
__m128i vTemp4i = _mm_srli_si128(reinterpret_cast<const __m128i *>(&vTemp4)[0],32/8);
|
||||
__m128i vTemp4i = _mm_srli_si128(_mm_castps_si128(vTemp4),32/8);
|
||||
// vTemp4i = x4,y4,z4,1.0f
|
||||
vTemp4i = _mm_or_si128(vTemp4i,g_XMIdentityR3);
|
||||
XMMATRIX M(vTemp1,
|
||||
vTemp2,
|
||||
vTemp3,
|
||||
reinterpret_cast<const __m128 *>(&vTemp4i)[0]);
|
||||
_mm_castsi128_ps(vTemp4i));
|
||||
return M;
|
||||
#else // _XM_VMX128_INTRINSICS_
|
||||
#endif // _XM_VMX128_INTRINSICS_
|
||||
@ -1195,7 +1195,7 @@ inline void XMStoreInt2
|
||||
__n64 VL = vget_low_u32(V);
|
||||
vst1_u32( pDestination, VL );
|
||||
#elif defined(_XM_SSE_INTRINSICS_)
|
||||
XMVECTOR T = _mm_shuffle_ps( V, V, _MM_SHUFFLE( 1, 1, 1, 1 ) );
|
||||
XMVECTOR T = XM_PERMUTE_PS( V, _MM_SHUFFLE( 1, 1, 1, 1 ) );
|
||||
_mm_store_ss( reinterpret_cast<float*>(&pDestination[0]), V );
|
||||
_mm_store_ss( reinterpret_cast<float*>(&pDestination[1]), T );
|
||||
#else // _XM_VMX128_INTRINSICS_
|
||||
@ -1219,7 +1219,7 @@ inline void XMStoreInt2A
|
||||
__n64 VL = vget_low_u32(V);
|
||||
vst1_u32_ex( pDestination, VL, 64 );
|
||||
#elif defined(_XM_SSE_INTRINSICS_)
|
||||
_mm_storel_epi64( reinterpret_cast<__m128i*>(pDestination), reinterpret_cast<const __m128i *>(&V)[0] );
|
||||
_mm_storel_epi64( reinterpret_cast<__m128i*>(pDestination), _mm_castps_si128(V) );
|
||||
#else // _XM_VMX128_INTRINSICS_
|
||||
#endif // _XM_VMX128_INTRINSICS_
|
||||
}
|
||||
@ -1240,7 +1240,7 @@ inline void XMStoreFloat2
|
||||
__n64 VL = vget_low_f32(V);
|
||||
vst1_f32( reinterpret_cast<float*>(pDestination), VL );
|
||||
#elif defined(_XM_SSE_INTRINSICS_)
|
||||
XMVECTOR T = _mm_shuffle_ps( V, V, _MM_SHUFFLE( 1, 1, 1, 1 ) );
|
||||
XMVECTOR T = XM_PERMUTE_PS( V, _MM_SHUFFLE( 1, 1, 1, 1 ) );
|
||||
_mm_store_ss( &pDestination->x, V );
|
||||
_mm_store_ss( &pDestination->y, T );
|
||||
#else // _XM_VMX128_INTRINSICS_
|
||||
@ -1264,7 +1264,7 @@ inline void XMStoreFloat2A
|
||||
__n64 VL = vget_low_f32(V);
|
||||
vst1_f32_ex( reinterpret_cast<float*>(pDestination), VL, 64 );
|
||||
#elif defined(_XM_SSE_INTRINSICS_)
|
||||
_mm_storel_epi64( reinterpret_cast<__m128i*>(pDestination), reinterpret_cast<const __m128i *>(&V)[0] );
|
||||
_mm_storel_epi64( reinterpret_cast<__m128i*>(pDestination), _mm_castps_si128(V) );
|
||||
#else // _XM_VMX128_INTRINSICS_
|
||||
#endif // _XM_VMX128_INTRINSICS_
|
||||
}
|
||||
@ -1292,10 +1292,10 @@ inline void XMStoreSInt2
|
||||
__m128i vResulti = _mm_cvttps_epi32(V);
|
||||
// If there was positive overflow, set to 0x7FFFFFFF
|
||||
XMVECTOR vResult = _mm_and_ps(vOverflow,g_XMAbsMask);
|
||||
vOverflow = _mm_andnot_ps(vOverflow,reinterpret_cast<const __m128 *>(&vResulti)[0]);
|
||||
vOverflow = _mm_andnot_ps(vOverflow,_mm_castsi128_ps(vResulti));
|
||||
vOverflow = _mm_or_ps(vOverflow,vResult);
|
||||
// Write two ints
|
||||
XMVECTOR T = _mm_shuffle_ps( vOverflow, vOverflow, _MM_SHUFFLE( 1, 1, 1, 1 ) );
|
||||
XMVECTOR T = XM_PERMUTE_PS( vOverflow, _MM_SHUFFLE( 1, 1, 1, 1 ) );
|
||||
_mm_store_ss( reinterpret_cast<float*>(&pDestination->x), vOverflow );
|
||||
_mm_store_ss( reinterpret_cast<float*>(&pDestination->y), T );
|
||||
#else // _XM_VMX128_INTRINSICS_
|
||||
@ -1333,11 +1333,11 @@ inline void XMStoreUInt2
|
||||
__m128i vResulti = _mm_cvttps_epi32(vResult);
|
||||
// Convert from signed to unsigned pnly if greater than 0x80000000
|
||||
vMask = _mm_and_ps(vMask,g_XMNegativeZero);
|
||||
vResult = _mm_xor_ps(reinterpret_cast<const __m128 *>(&vResulti)[0],vMask);
|
||||
vResult = _mm_xor_ps(_mm_castsi128_ps(vResulti),vMask);
|
||||
// On those that are too large, set to 0xFFFFFFFF
|
||||
vResult = _mm_or_ps(vResult,vOverflow);
|
||||
// Write two uints
|
||||
XMVECTOR T = _mm_shuffle_ps( vResult, vResult, _MM_SHUFFLE( 1, 1, 1, 1 ) );
|
||||
XMVECTOR T = XM_PERMUTE_PS( vResult, _MM_SHUFFLE( 1, 1, 1, 1 ) );
|
||||
_mm_store_ss( reinterpret_cast<float*>(&pDestination->x), vResult );
|
||||
_mm_store_ss( reinterpret_cast<float*>(&pDestination->y), T );
|
||||
#else // _XM_VMX128_INTRINSICS_
|
||||
@ -1362,8 +1362,8 @@ inline void XMStoreInt3
|
||||
vst1_u32( pDestination, VL );
|
||||
vst1q_lane_u32( pDestination+2, V, 2 );
|
||||
#elif defined(_XM_SSE_INTRINSICS_)
|
||||
XMVECTOR T1 = _mm_shuffle_ps(V,V,_MM_SHUFFLE(1,1,1,1));
|
||||
XMVECTOR T2 = _mm_shuffle_ps(V,V,_MM_SHUFFLE(2,2,2,2));
|
||||
XMVECTOR T1 = XM_PERMUTE_PS(V,_MM_SHUFFLE(1,1,1,1));
|
||||
XMVECTOR T2 = XM_PERMUTE_PS(V,_MM_SHUFFLE(2,2,2,2));
|
||||
_mm_store_ss( reinterpret_cast<float*>(pDestination), V );
|
||||
_mm_store_ss( reinterpret_cast<float*>(&pDestination[1]), T1 );
|
||||
_mm_store_ss( reinterpret_cast<float*>(&pDestination[2]), T2 );
|
||||
@ -1390,8 +1390,8 @@ inline void XMStoreInt3A
|
||||
vst1_u32_ex( pDestination, VL, 64 );
|
||||
vst1q_lane_u32( pDestination+2, V, 2 );
|
||||
#elif defined(_XM_SSE_INTRINSICS_)
|
||||
XMVECTOR T = _mm_shuffle_ps(V,V,_MM_SHUFFLE(2,2,2,2));
|
||||
_mm_storel_epi64( reinterpret_cast<__m128i*>(pDestination), reinterpret_cast<const __m128i *>(&V)[0] );
|
||||
XMVECTOR T = XM_PERMUTE_PS(V,_MM_SHUFFLE(2,2,2,2));
|
||||
_mm_storel_epi64( reinterpret_cast<__m128i*>(pDestination), _mm_castps_si128(V) );
|
||||
_mm_store_ss( reinterpret_cast<float*>(&pDestination[2]), T );
|
||||
#else // _XM_VMX128_INTRINSICS_
|
||||
#endif // _XM_VMX128_INTRINSICS_
|
||||
@ -1415,8 +1415,8 @@ inline void XMStoreFloat3
|
||||
vst1_f32( reinterpret_cast<float*>(pDestination), VL );
|
||||
vst1q_lane_f32( reinterpret_cast<float*>(pDestination)+2, V, 2 );
|
||||
#elif defined(_XM_SSE_INTRINSICS_)
|
||||
XMVECTOR T1 = _mm_shuffle_ps(V,V,_MM_SHUFFLE(1,1,1,1));
|
||||
XMVECTOR T2 = _mm_shuffle_ps(V,V,_MM_SHUFFLE(2,2,2,2));
|
||||
XMVECTOR T1 = XM_PERMUTE_PS(V,_MM_SHUFFLE(1,1,1,1));
|
||||
XMVECTOR T2 = XM_PERMUTE_PS(V,_MM_SHUFFLE(2,2,2,2));
|
||||
_mm_store_ss( &pDestination->x, V );
|
||||
_mm_store_ss( &pDestination->y, T1 );
|
||||
_mm_store_ss( &pDestination->z, T2 );
|
||||
@ -1443,8 +1443,8 @@ inline void XMStoreFloat3A
|
||||
vst1_f32_ex( reinterpret_cast<float*>(pDestination), VL, 64 );
|
||||
vst1q_lane_f32( reinterpret_cast<float*>(pDestination)+2, V, 2 );
|
||||
#elif defined(_XM_SSE_INTRINSICS_)
|
||||
XMVECTOR T = _mm_shuffle_ps(V,V,_MM_SHUFFLE(2,2,2,2));
|
||||
_mm_storel_epi64( reinterpret_cast<__m128i*>(pDestination), reinterpret_cast<const __m128i *>(&V)[0] );
|
||||
XMVECTOR T = XM_PERMUTE_PS(V,_MM_SHUFFLE(2,2,2,2));
|
||||
_mm_storel_epi64( reinterpret_cast<__m128i*>(pDestination), _mm_castps_si128(V) );
|
||||
_mm_store_ss( &pDestination->z, T );
|
||||
#else // _XM_VMX128_INTRINSICS_
|
||||
#endif // _XM_VMX128_INTRINSICS_
|
||||
@ -1475,11 +1475,11 @@ inline void XMStoreSInt3
|
||||
__m128i vResulti = _mm_cvttps_epi32(V);
|
||||
// If there was positive overflow, set to 0x7FFFFFFF
|
||||
XMVECTOR vResult = _mm_and_ps(vOverflow,g_XMAbsMask);
|
||||
vOverflow = _mm_andnot_ps(vOverflow,reinterpret_cast<const __m128 *>(&vResulti)[0]);
|
||||
vOverflow = _mm_andnot_ps(vOverflow,_mm_castsi128_ps(vResulti));
|
||||
vOverflow = _mm_or_ps(vOverflow,vResult);
|
||||
// Write 3 uints
|
||||
XMVECTOR T1 = _mm_shuffle_ps(vOverflow,vOverflow,_MM_SHUFFLE(1,1,1,1));
|
||||
XMVECTOR T2 = _mm_shuffle_ps(vOverflow,vOverflow,_MM_SHUFFLE(2,2,2,2));
|
||||
XMVECTOR T1 = XM_PERMUTE_PS(vOverflow,_MM_SHUFFLE(1,1,1,1));
|
||||
XMVECTOR T2 = XM_PERMUTE_PS(vOverflow,_MM_SHUFFLE(2,2,2,2));
|
||||
_mm_store_ss( reinterpret_cast<float*>(&pDestination->x), vOverflow );
|
||||
_mm_store_ss( reinterpret_cast<float*>(&pDestination->y), T1 );
|
||||
_mm_store_ss( reinterpret_cast<float*>(&pDestination->z), T2 );
|
||||
@ -1520,12 +1520,12 @@ inline void XMStoreUInt3
|
||||
__m128i vResulti = _mm_cvttps_epi32(vResult);
|
||||
// Convert from signed to unsigned pnly if greater than 0x80000000
|
||||
vMask = _mm_and_ps(vMask,g_XMNegativeZero);
|
||||
vResult = _mm_xor_ps(reinterpret_cast<const __m128 *>(&vResulti)[0],vMask);
|
||||
vResult = _mm_xor_ps(_mm_castsi128_ps(vResulti),vMask);
|
||||
// On those that are too large, set to 0xFFFFFFFF
|
||||
vResult = _mm_or_ps(vResult,vOverflow);
|
||||
// Write 3 uints
|
||||
XMVECTOR T1 = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(1,1,1,1));
|
||||
XMVECTOR T2 = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(2,2,2,2));
|
||||
XMVECTOR T1 = XM_PERMUTE_PS(vResult,_MM_SHUFFLE(1,1,1,1));
|
||||
XMVECTOR T2 = XM_PERMUTE_PS(vResult,_MM_SHUFFLE(2,2,2,2));
|
||||
_mm_store_ss( reinterpret_cast<float*>(&pDestination->x), vResult );
|
||||
_mm_store_ss( reinterpret_cast<float*>(&pDestination->y), T1 );
|
||||
_mm_store_ss( reinterpret_cast<float*>(&pDestination->z), T2 );
|
||||
@ -1550,7 +1550,7 @@ inline void XMStoreInt4
|
||||
#elif defined(_XM_ARM_NEON_INTRINSICS_)
|
||||
vst1q_u32( pDestination, V );
|
||||
#elif defined(_XM_SSE_INTRINSICS_)
|
||||
_mm_storeu_si128( reinterpret_cast<__m128i*>(pDestination), reinterpret_cast<const __m128i *>(&V)[0] );
|
||||
_mm_storeu_si128( reinterpret_cast<__m128i*>(pDestination), _mm_castps_si128(V) );
|
||||
#else // _XM_VMX128_INTRINSICS_
|
||||
#endif // _XM_VMX128_INTRINSICS_
|
||||
}
|
||||
@ -1573,7 +1573,7 @@ inline void XMStoreInt4A
|
||||
#elif defined(_XM_ARM_NEON_INTRINSICS_)
|
||||
vst1q_u32_ex( pDestination, V, 128 );
|
||||
#elif defined(_XM_SSE_INTRINSICS_)
|
||||
_mm_store_si128( reinterpret_cast<__m128i*>(pDestination), reinterpret_cast<const __m128i *>(&V)[0] );
|
||||
_mm_store_si128( reinterpret_cast<__m128i*>(pDestination), _mm_castps_si128(V) );
|
||||
#else // _XM_VMX128_INTRINSICS_
|
||||
#endif // _XM_VMX128_INTRINSICS_
|
||||
}
|
||||
@ -1649,9 +1649,9 @@ inline void XMStoreSInt4
|
||||
__m128i vResulti = _mm_cvttps_epi32(V);
|
||||
// If there was positive overflow, set to 0x7FFFFFFF
|
||||
XMVECTOR vResult = _mm_and_ps(vOverflow,g_XMAbsMask);
|
||||
vOverflow = _mm_andnot_ps(vOverflow,reinterpret_cast<const __m128 *>(&vResulti)[0]);
|
||||
vOverflow = _mm_andnot_ps(vOverflow,_mm_castsi128_ps(vResulti));
|
||||
vOverflow = _mm_or_ps(vOverflow,vResult);
|
||||
_mm_storeu_si128( reinterpret_cast<__m128i*>(pDestination), reinterpret_cast<const __m128i *>(&vOverflow)[0] );
|
||||
_mm_storeu_si128( reinterpret_cast<__m128i*>(pDestination), _mm_castps_si128(vOverflow) );
|
||||
#else // _XM_VMX128_INTRINSICS_
|
||||
#endif // _XM_VMX128_INTRINSICS_
|
||||
}
|
||||
@ -1688,10 +1688,10 @@ inline void XMStoreUInt4
|
||||
__m128i vResulti = _mm_cvttps_epi32(vResult);
|
||||
// Convert from signed to unsigned pnly if greater than 0x80000000
|
||||
vMask = _mm_and_ps(vMask,g_XMNegativeZero);
|
||||
vResult = _mm_xor_ps(reinterpret_cast<const __m128 *>(&vResulti)[0],vMask);
|
||||
vResult = _mm_xor_ps(_mm_castsi128_ps(vResulti),vMask);
|
||||
// On those that are too large, set to 0xFFFFFFFF
|
||||
vResult = _mm_or_ps(vResult,vOverflow);
|
||||
_mm_storeu_si128( reinterpret_cast<__m128i*>(pDestination), reinterpret_cast<const __m128i *>(&vResult)[0] );
|
||||
_mm_storeu_si128( reinterpret_cast<__m128i*>(pDestination), _mm_castps_si128(vResult) );
|
||||
#else // _XM_VMX128_INTRINSICS_
|
||||
#endif // _XM_VMX128_INTRINSICS_
|
||||
}
|
||||
@ -1738,7 +1738,7 @@ inline void XMStoreFloat3x3
|
||||
_mm_storeu_ps(&pDestination->m[0][0],vTemp1);
|
||||
vTemp2 = _mm_shuffle_ps(vTemp2,vTemp3,_MM_SHUFFLE(1,0,2,1));
|
||||
_mm_storeu_ps(&pDestination->m[1][1],vTemp2);
|
||||
vTemp3 = _mm_shuffle_ps(vTemp3,vTemp3,_MM_SHUFFLE(2,2,2,2));
|
||||
vTemp3 = XM_PERMUTE_PS(vTemp3,_MM_SHUFFLE(2,2,2,2));
|
||||
_mm_store_ss(&pDestination->m[2][2],vTemp3);
|
||||
#else // _XM_VMX128_INTRINSICS_
|
||||
#endif // _XM_VMX128_INTRINSICS_
|
||||
|
@ -319,10 +319,10 @@ inline XMMATRIX XMMatrixMultiply
|
||||
// Use vW to hold the original row
|
||||
XMVECTOR vW = M1.r[0];
|
||||
// Splat the component X,Y,Z then W
|
||||
XMVECTOR vX = _mm_shuffle_ps(vW,vW,_MM_SHUFFLE(0,0,0,0));
|
||||
XMVECTOR vY = _mm_shuffle_ps(vW,vW,_MM_SHUFFLE(1,1,1,1));
|
||||
XMVECTOR vZ = _mm_shuffle_ps(vW,vW,_MM_SHUFFLE(2,2,2,2));
|
||||
vW = _mm_shuffle_ps(vW,vW,_MM_SHUFFLE(3,3,3,3));
|
||||
XMVECTOR vX = XM_PERMUTE_PS(vW,_MM_SHUFFLE(0,0,0,0));
|
||||
XMVECTOR vY = XM_PERMUTE_PS(vW,_MM_SHUFFLE(1,1,1,1));
|
||||
XMVECTOR vZ = XM_PERMUTE_PS(vW,_MM_SHUFFLE(2,2,2,2));
|
||||
vW = XM_PERMUTE_PS(vW,_MM_SHUFFLE(3,3,3,3));
|
||||
// Perform the operation on the first row
|
||||
vX = _mm_mul_ps(vX,M2.r[0]);
|
||||
vY = _mm_mul_ps(vY,M2.r[1]);
|
||||
@ -335,10 +335,10 @@ inline XMMATRIX XMMatrixMultiply
|
||||
mResult.r[0] = vX;
|
||||
// Repeat for the other 3 rows
|
||||
vW = M1.r[1];
|
||||
vX = _mm_shuffle_ps(vW,vW,_MM_SHUFFLE(0,0,0,0));
|
||||
vY = _mm_shuffle_ps(vW,vW,_MM_SHUFFLE(1,1,1,1));
|
||||
vZ = _mm_shuffle_ps(vW,vW,_MM_SHUFFLE(2,2,2,2));
|
||||
vW = _mm_shuffle_ps(vW,vW,_MM_SHUFFLE(3,3,3,3));
|
||||
vX = XM_PERMUTE_PS(vW,_MM_SHUFFLE(0,0,0,0));
|
||||
vY = XM_PERMUTE_PS(vW,_MM_SHUFFLE(1,1,1,1));
|
||||
vZ = XM_PERMUTE_PS(vW,_MM_SHUFFLE(2,2,2,2));
|
||||
vW = XM_PERMUTE_PS(vW,_MM_SHUFFLE(3,3,3,3));
|
||||
vX = _mm_mul_ps(vX,M2.r[0]);
|
||||
vY = _mm_mul_ps(vY,M2.r[1]);
|
||||
vZ = _mm_mul_ps(vZ,M2.r[2]);
|
||||
@ -348,10 +348,10 @@ inline XMMATRIX XMMatrixMultiply
|
||||
vX = _mm_add_ps(vX,vY);
|
||||
mResult.r[1] = vX;
|
||||
vW = M1.r[2];
|
||||
vX = _mm_shuffle_ps(vW,vW,_MM_SHUFFLE(0,0,0,0));
|
||||
vY = _mm_shuffle_ps(vW,vW,_MM_SHUFFLE(1,1,1,1));
|
||||
vZ = _mm_shuffle_ps(vW,vW,_MM_SHUFFLE(2,2,2,2));
|
||||
vW = _mm_shuffle_ps(vW,vW,_MM_SHUFFLE(3,3,3,3));
|
||||
vX = XM_PERMUTE_PS(vW,_MM_SHUFFLE(0,0,0,0));
|
||||
vY = XM_PERMUTE_PS(vW,_MM_SHUFFLE(1,1,1,1));
|
||||
vZ = XM_PERMUTE_PS(vW,_MM_SHUFFLE(2,2,2,2));
|
||||
vW = XM_PERMUTE_PS(vW,_MM_SHUFFLE(3,3,3,3));
|
||||
vX = _mm_mul_ps(vX,M2.r[0]);
|
||||
vY = _mm_mul_ps(vY,M2.r[1]);
|
||||
vZ = _mm_mul_ps(vZ,M2.r[2]);
|
||||
@ -361,10 +361,10 @@ inline XMMATRIX XMMatrixMultiply
|
||||
vX = _mm_add_ps(vX,vY);
|
||||
mResult.r[2] = vX;
|
||||
vW = M1.r[3];
|
||||
vX = _mm_shuffle_ps(vW,vW,_MM_SHUFFLE(0,0,0,0));
|
||||
vY = _mm_shuffle_ps(vW,vW,_MM_SHUFFLE(1,1,1,1));
|
||||
vZ = _mm_shuffle_ps(vW,vW,_MM_SHUFFLE(2,2,2,2));
|
||||
vW = _mm_shuffle_ps(vW,vW,_MM_SHUFFLE(3,3,3,3));
|
||||
vX = XM_PERMUTE_PS(vW,_MM_SHUFFLE(0,0,0,0));
|
||||
vY = XM_PERMUTE_PS(vW,_MM_SHUFFLE(1,1,1,1));
|
||||
vZ = XM_PERMUTE_PS(vW,_MM_SHUFFLE(2,2,2,2));
|
||||
vW = XM_PERMUTE_PS(vW,_MM_SHUFFLE(3,3,3,3));
|
||||
vX = _mm_mul_ps(vX,M2.r[0]);
|
||||
vY = _mm_mul_ps(vY,M2.r[1]);
|
||||
vZ = _mm_mul_ps(vZ,M2.r[2]);
|
||||
@ -490,10 +490,10 @@ inline XMMATRIX XMMatrixMultiplyTranspose
|
||||
// Use vW to hold the original row
|
||||
XMVECTOR vW = M1.r[0];
|
||||
// Splat the component X,Y,Z then W
|
||||
XMVECTOR vX = _mm_shuffle_ps(vW,vW,_MM_SHUFFLE(0,0,0,0));
|
||||
XMVECTOR vY = _mm_shuffle_ps(vW,vW,_MM_SHUFFLE(1,1,1,1));
|
||||
XMVECTOR vZ = _mm_shuffle_ps(vW,vW,_MM_SHUFFLE(2,2,2,2));
|
||||
vW = _mm_shuffle_ps(vW,vW,_MM_SHUFFLE(3,3,3,3));
|
||||
XMVECTOR vX = XM_PERMUTE_PS(vW,_MM_SHUFFLE(0,0,0,0));
|
||||
XMVECTOR vY = XM_PERMUTE_PS(vW,_MM_SHUFFLE(1,1,1,1));
|
||||
XMVECTOR vZ = XM_PERMUTE_PS(vW,_MM_SHUFFLE(2,2,2,2));
|
||||
vW = XM_PERMUTE_PS(vW,_MM_SHUFFLE(3,3,3,3));
|
||||
// Perform the operation on the first row
|
||||
vX = _mm_mul_ps(vX,M2.r[0]);
|
||||
vY = _mm_mul_ps(vY,M2.r[1]);
|
||||
@ -506,10 +506,10 @@ inline XMMATRIX XMMatrixMultiplyTranspose
|
||||
__m128 r0 = vX;
|
||||
// Repeat for the other 3 rows
|
||||
vW = M1.r[1];
|
||||
vX = _mm_shuffle_ps(vW,vW,_MM_SHUFFLE(0,0,0,0));
|
||||
vY = _mm_shuffle_ps(vW,vW,_MM_SHUFFLE(1,1,1,1));
|
||||
vZ = _mm_shuffle_ps(vW,vW,_MM_SHUFFLE(2,2,2,2));
|
||||
vW = _mm_shuffle_ps(vW,vW,_MM_SHUFFLE(3,3,3,3));
|
||||
vX = XM_PERMUTE_PS(vW,_MM_SHUFFLE(0,0,0,0));
|
||||
vY = XM_PERMUTE_PS(vW,_MM_SHUFFLE(1,1,1,1));
|
||||
vZ = XM_PERMUTE_PS(vW,_MM_SHUFFLE(2,2,2,2));
|
||||
vW = XM_PERMUTE_PS(vW,_MM_SHUFFLE(3,3,3,3));
|
||||
vX = _mm_mul_ps(vX,M2.r[0]);
|
||||
vY = _mm_mul_ps(vY,M2.r[1]);
|
||||
vZ = _mm_mul_ps(vZ,M2.r[2]);
|
||||
@ -519,10 +519,10 @@ inline XMMATRIX XMMatrixMultiplyTranspose
|
||||
vX = _mm_add_ps(vX,vY);
|
||||
__m128 r1 = vX;
|
||||
vW = M1.r[2];
|
||||
vX = _mm_shuffle_ps(vW,vW,_MM_SHUFFLE(0,0,0,0));
|
||||
vY = _mm_shuffle_ps(vW,vW,_MM_SHUFFLE(1,1,1,1));
|
||||
vZ = _mm_shuffle_ps(vW,vW,_MM_SHUFFLE(2,2,2,2));
|
||||
vW = _mm_shuffle_ps(vW,vW,_MM_SHUFFLE(3,3,3,3));
|
||||
vX = XM_PERMUTE_PS(vW,_MM_SHUFFLE(0,0,0,0));
|
||||
vY = XM_PERMUTE_PS(vW,_MM_SHUFFLE(1,1,1,1));
|
||||
vZ = XM_PERMUTE_PS(vW,_MM_SHUFFLE(2,2,2,2));
|
||||
vW = XM_PERMUTE_PS(vW,_MM_SHUFFLE(3,3,3,3));
|
||||
vX = _mm_mul_ps(vX,M2.r[0]);
|
||||
vY = _mm_mul_ps(vY,M2.r[1]);
|
||||
vZ = _mm_mul_ps(vZ,M2.r[2]);
|
||||
@ -532,10 +532,10 @@ inline XMMATRIX XMMatrixMultiplyTranspose
|
||||
vX = _mm_add_ps(vX,vY);
|
||||
__m128 r2 = vX;
|
||||
vW = M1.r[3];
|
||||
vX = _mm_shuffle_ps(vW,vW,_MM_SHUFFLE(0,0,0,0));
|
||||
vY = _mm_shuffle_ps(vW,vW,_MM_SHUFFLE(1,1,1,1));
|
||||
vZ = _mm_shuffle_ps(vW,vW,_MM_SHUFFLE(2,2,2,2));
|
||||
vW = _mm_shuffle_ps(vW,vW,_MM_SHUFFLE(3,3,3,3));
|
||||
vX = XM_PERMUTE_PS(vW,_MM_SHUFFLE(0,0,0,0));
|
||||
vY = XM_PERMUTE_PS(vW,_MM_SHUFFLE(1,1,1,1));
|
||||
vZ = XM_PERMUTE_PS(vW,_MM_SHUFFLE(2,2,2,2));
|
||||
vW = XM_PERMUTE_PS(vW,_MM_SHUFFLE(3,3,3,3));
|
||||
vX = _mm_mul_ps(vX,M2.r[0]);
|
||||
vY = _mm_mul_ps(vY,M2.r[1]);
|
||||
vZ = _mm_mul_ps(vZ,M2.r[2]);
|
||||
@ -738,10 +738,10 @@ inline XMMATRIX XMMatrixInverse
|
||||
|
||||
#elif defined(_XM_SSE_INTRINSICS_)
|
||||
XMMATRIX MT = XMMatrixTranspose(M);
|
||||
XMVECTOR V00 = _mm_shuffle_ps(MT.r[2], MT.r[2],_MM_SHUFFLE(1,1,0,0));
|
||||
XMVECTOR V10 = _mm_shuffle_ps(MT.r[3], MT.r[3],_MM_SHUFFLE(3,2,3,2));
|
||||
XMVECTOR V01 = _mm_shuffle_ps(MT.r[0], MT.r[0],_MM_SHUFFLE(1,1,0,0));
|
||||
XMVECTOR V11 = _mm_shuffle_ps(MT.r[1], MT.r[1],_MM_SHUFFLE(3,2,3,2));
|
||||
XMVECTOR V00 = XM_PERMUTE_PS(MT.r[2],_MM_SHUFFLE(1,1,0,0));
|
||||
XMVECTOR V10 = XM_PERMUTE_PS(MT.r[3],_MM_SHUFFLE(3,2,3,2));
|
||||
XMVECTOR V01 = XM_PERMUTE_PS(MT.r[0],_MM_SHUFFLE(1,1,0,0));
|
||||
XMVECTOR V11 = XM_PERMUTE_PS(MT.r[1],_MM_SHUFFLE(3,2,3,2));
|
||||
XMVECTOR V02 = _mm_shuffle_ps(MT.r[2], MT.r[0],_MM_SHUFFLE(2,0,2,0));
|
||||
XMVECTOR V12 = _mm_shuffle_ps(MT.r[3], MT.r[1],_MM_SHUFFLE(3,1,3,1));
|
||||
|
||||
@ -749,10 +749,10 @@ inline XMMATRIX XMMatrixInverse
|
||||
XMVECTOR D1 = _mm_mul_ps(V01,V11);
|
||||
XMVECTOR D2 = _mm_mul_ps(V02,V12);
|
||||
|
||||
V00 = _mm_shuffle_ps(MT.r[2],MT.r[2],_MM_SHUFFLE(3,2,3,2));
|
||||
V10 = _mm_shuffle_ps(MT.r[3],MT.r[3],_MM_SHUFFLE(1,1,0,0));
|
||||
V01 = _mm_shuffle_ps(MT.r[0],MT.r[0],_MM_SHUFFLE(3,2,3,2));
|
||||
V11 = _mm_shuffle_ps(MT.r[1],MT.r[1],_MM_SHUFFLE(1,1,0,0));
|
||||
V00 = XM_PERMUTE_PS(MT.r[2],_MM_SHUFFLE(3,2,3,2));
|
||||
V10 = XM_PERMUTE_PS(MT.r[3],_MM_SHUFFLE(1,1,0,0));
|
||||
V01 = XM_PERMUTE_PS(MT.r[0],_MM_SHUFFLE(3,2,3,2));
|
||||
V11 = XM_PERMUTE_PS(MT.r[1],_MM_SHUFFLE(1,1,0,0));
|
||||
V02 = _mm_shuffle_ps(MT.r[2],MT.r[0],_MM_SHUFFLE(3,1,3,1));
|
||||
V12 = _mm_shuffle_ps(MT.r[3],MT.r[1],_MM_SHUFFLE(2,0,2,0));
|
||||
|
||||
@ -764,15 +764,15 @@ inline XMMATRIX XMMatrixInverse
|
||||
D2 = _mm_sub_ps(D2,V02);
|
||||
// V11 = D0Y,D0W,D2Y,D2Y
|
||||
V11 = _mm_shuffle_ps(D0,D2,_MM_SHUFFLE(1,1,3,1));
|
||||
V00 = _mm_shuffle_ps(MT.r[1], MT.r[1],_MM_SHUFFLE(1,0,2,1));
|
||||
V00 = XM_PERMUTE_PS(MT.r[1], _MM_SHUFFLE(1,0,2,1));
|
||||
V10 = _mm_shuffle_ps(V11,D0,_MM_SHUFFLE(0,3,0,2));
|
||||
V01 = _mm_shuffle_ps(MT.r[0], MT.r[0],_MM_SHUFFLE(0,1,0,2));
|
||||
V01 = XM_PERMUTE_PS(MT.r[0], _MM_SHUFFLE(0,1,0,2));
|
||||
V11 = _mm_shuffle_ps(V11,D0,_MM_SHUFFLE(2,1,2,1));
|
||||
// V13 = D1Y,D1W,D2W,D2W
|
||||
XMVECTOR V13 = _mm_shuffle_ps(D1,D2,_MM_SHUFFLE(3,3,3,1));
|
||||
V02 = _mm_shuffle_ps(MT.r[3], MT.r[3],_MM_SHUFFLE(1,0,2,1));
|
||||
V02 = XM_PERMUTE_PS(MT.r[3], _MM_SHUFFLE(1,0,2,1));
|
||||
V12 = _mm_shuffle_ps(V13,D1,_MM_SHUFFLE(0,3,0,2));
|
||||
XMVECTOR V03 = _mm_shuffle_ps(MT.r[2], MT.r[2],_MM_SHUFFLE(0,1,0,2));
|
||||
XMVECTOR V03 = XM_PERMUTE_PS(MT.r[2],_MM_SHUFFLE(0,1,0,2));
|
||||
V13 = _mm_shuffle_ps(V13,D1,_MM_SHUFFLE(2,1,2,1));
|
||||
|
||||
XMVECTOR C0 = _mm_mul_ps(V00,V10);
|
||||
@ -782,15 +782,15 @@ inline XMMATRIX XMMatrixInverse
|
||||
|
||||
// V11 = D0X,D0Y,D2X,D2X
|
||||
V11 = _mm_shuffle_ps(D0,D2,_MM_SHUFFLE(0,0,1,0));
|
||||
V00 = _mm_shuffle_ps(MT.r[1], MT.r[1],_MM_SHUFFLE(2,1,3,2));
|
||||
V00 = XM_PERMUTE_PS(MT.r[1], _MM_SHUFFLE(2,1,3,2));
|
||||
V10 = _mm_shuffle_ps(D0,V11,_MM_SHUFFLE(2,1,0,3));
|
||||
V01 = _mm_shuffle_ps(MT.r[0], MT.r[0],_MM_SHUFFLE(1,3,2,3));
|
||||
V01 = XM_PERMUTE_PS(MT.r[0], _MM_SHUFFLE(1,3,2,3));
|
||||
V11 = _mm_shuffle_ps(D0,V11,_MM_SHUFFLE(0,2,1,2));
|
||||
// V13 = D1X,D1Y,D2Z,D2Z
|
||||
V13 = _mm_shuffle_ps(D1,D2,_MM_SHUFFLE(2,2,1,0));
|
||||
V02 = _mm_shuffle_ps(MT.r[3], MT.r[3],_MM_SHUFFLE(2,1,3,2));
|
||||
V02 = XM_PERMUTE_PS(MT.r[3], _MM_SHUFFLE(2,1,3,2));
|
||||
V12 = _mm_shuffle_ps(D1,V13,_MM_SHUFFLE(2,1,0,3));
|
||||
V03 = _mm_shuffle_ps(MT.r[2], MT.r[2],_MM_SHUFFLE(1,3,2,3));
|
||||
V03 = XM_PERMUTE_PS(MT.r[2],_MM_SHUFFLE(1,3,2,3));
|
||||
V13 = _mm_shuffle_ps(D1,V13,_MM_SHUFFLE(0,2,1,2));
|
||||
|
||||
V00 = _mm_mul_ps(V00,V10);
|
||||
@ -802,22 +802,22 @@ inline XMMATRIX XMMatrixInverse
|
||||
C4 = _mm_sub_ps(C4,V02);
|
||||
C6 = _mm_sub_ps(C6,V03);
|
||||
|
||||
V00 = _mm_shuffle_ps(MT.r[1],MT.r[1],_MM_SHUFFLE(0,3,0,3));
|
||||
V00 = XM_PERMUTE_PS(MT.r[1],_MM_SHUFFLE(0,3,0,3));
|
||||
// V10 = D0Z,D0Z,D2X,D2Y
|
||||
V10 = _mm_shuffle_ps(D0,D2,_MM_SHUFFLE(1,0,2,2));
|
||||
V10 = _mm_shuffle_ps(V10,V10,_MM_SHUFFLE(0,2,3,0));
|
||||
V01 = _mm_shuffle_ps(MT.r[0],MT.r[0],_MM_SHUFFLE(2,0,3,1));
|
||||
V10 = XM_PERMUTE_PS(V10,_MM_SHUFFLE(0,2,3,0));
|
||||
V01 = XM_PERMUTE_PS(MT.r[0],_MM_SHUFFLE(2,0,3,1));
|
||||
// V11 = D0X,D0W,D2X,D2Y
|
||||
V11 = _mm_shuffle_ps(D0,D2,_MM_SHUFFLE(1,0,3,0));
|
||||
V11 = _mm_shuffle_ps(V11,V11,_MM_SHUFFLE(2,1,0,3));
|
||||
V02 = _mm_shuffle_ps(MT.r[3],MT.r[3],_MM_SHUFFLE(0,3,0,3));
|
||||
V11 = XM_PERMUTE_PS(V11,_MM_SHUFFLE(2,1,0,3));
|
||||
V02 = XM_PERMUTE_PS(MT.r[3],_MM_SHUFFLE(0,3,0,3));
|
||||
// V12 = D1Z,D1Z,D2Z,D2W
|
||||
V12 = _mm_shuffle_ps(D1,D2,_MM_SHUFFLE(3,2,2,2));
|
||||
V12 = _mm_shuffle_ps(V12,V12,_MM_SHUFFLE(0,2,3,0));
|
||||
V03 = _mm_shuffle_ps(MT.r[2],MT.r[2],_MM_SHUFFLE(2,0,3,1));
|
||||
V12 = XM_PERMUTE_PS(V12,_MM_SHUFFLE(0,2,3,0));
|
||||
V03 = XM_PERMUTE_PS(MT.r[2],_MM_SHUFFLE(2,0,3,1));
|
||||
// V13 = D1X,D1W,D2Z,D2W
|
||||
V13 = _mm_shuffle_ps(D1,D2,_MM_SHUFFLE(3,2,3,0));
|
||||
V13 = _mm_shuffle_ps(V13,V13,_MM_SHUFFLE(2,1,0,3));
|
||||
V13 = XM_PERMUTE_PS(V13,_MM_SHUFFLE(2,1,0,3));
|
||||
|
||||
V00 = _mm_mul_ps(V00,V10);
|
||||
V01 = _mm_mul_ps(V01,V11);
|
||||
@ -836,10 +836,10 @@ inline XMMATRIX XMMatrixInverse
|
||||
C2 = _mm_shuffle_ps(C2,C3,_MM_SHUFFLE(3,1,2,0));
|
||||
C4 = _mm_shuffle_ps(C4,C5,_MM_SHUFFLE(3,1,2,0));
|
||||
C6 = _mm_shuffle_ps(C6,C7,_MM_SHUFFLE(3,1,2,0));
|
||||
C0 = _mm_shuffle_ps(C0,C0,_MM_SHUFFLE(3,1,2,0));
|
||||
C2 = _mm_shuffle_ps(C2,C2,_MM_SHUFFLE(3,1,2,0));
|
||||
C4 = _mm_shuffle_ps(C4,C4,_MM_SHUFFLE(3,1,2,0));
|
||||
C6 = _mm_shuffle_ps(C6,C6,_MM_SHUFFLE(3,1,2,0));
|
||||
C0 = XM_PERMUTE_PS(C0,_MM_SHUFFLE(3,1,2,0));
|
||||
C2 = XM_PERMUTE_PS(C2,_MM_SHUFFLE(3,1,2,0));
|
||||
C4 = XM_PERMUTE_PS(C4,_MM_SHUFFLE(3,1,2,0));
|
||||
C6 = XM_PERMUTE_PS(C6,_MM_SHUFFLE(3,1,2,0));
|
||||
// Get the determinate
|
||||
XMVECTOR vTemp = XMVector4Dot(C0,MT.r[0]);
|
||||
if (pDeterminant != nullptr)
|
||||
@ -1357,7 +1357,7 @@ inline XMMATRIX XMMatrixRotationX
|
||||
M.r[0] = g_XMIdentityR0;
|
||||
M.r[1] = vCos;
|
||||
// x = 0,y = sin,z = cos, w = 0
|
||||
vCos = _mm_shuffle_ps(vCos,vCos,_MM_SHUFFLE(3,1,2,0));
|
||||
vCos = XM_PERMUTE_PS(vCos,_MM_SHUFFLE(3,1,2,0));
|
||||
// x = 0,y = -sin,z = cos, w = 0
|
||||
vCos = _mm_mul_ps(vCos,g_XMNegateY);
|
||||
M.r[2] = vCos;
|
||||
@ -1434,7 +1434,7 @@ inline XMMATRIX XMMatrixRotationY
|
||||
M.r[2] = vSin;
|
||||
M.r[1] = g_XMIdentityR1;
|
||||
// x = cos,y = 0,z = sin, w = 0
|
||||
vSin = _mm_shuffle_ps(vSin,vSin,_MM_SHUFFLE(3,0,1,2));
|
||||
vSin = XM_PERMUTE_PS(vSin,_MM_SHUFFLE(3,0,1,2));
|
||||
// x = cos,y = 0,z = -sin, w = 0
|
||||
vSin = _mm_mul_ps(vSin,g_XMNegateZ);
|
||||
M.r[0] = vSin;
|
||||
@ -1510,7 +1510,7 @@ inline XMMATRIX XMMatrixRotationZ
|
||||
XMMATRIX M;
|
||||
M.r[0] = vCos;
|
||||
// x = sin,y = cos,z = 0, w = 0
|
||||
vCos = _mm_shuffle_ps(vCos,vCos,_MM_SHUFFLE(3,2,0,1));
|
||||
vCos = XM_PERMUTE_PS(vCos,_MM_SHUFFLE(3,2,0,1));
|
||||
// x = cos,y = -sin,z = 0, w = 0
|
||||
vCos = _mm_mul_ps(vCos,g_XMNegateX);
|
||||
M.r[1] = vCos;
|
||||
@ -1597,8 +1597,8 @@ inline XMMATRIX XMMatrixRotationNormal
|
||||
XMVECTOR C1 = _mm_set_ps1(fCosAngle);
|
||||
XMVECTOR C0 = _mm_set_ps1(fSinAngle);
|
||||
|
||||
XMVECTOR N0 = _mm_shuffle_ps(NormalAxis,NormalAxis,_MM_SHUFFLE(3,0,2,1));
|
||||
XMVECTOR N1 = _mm_shuffle_ps(NormalAxis,NormalAxis,_MM_SHUFFLE(3,1,0,2));
|
||||
XMVECTOR N0 = XM_PERMUTE_PS(NormalAxis,_MM_SHUFFLE(3,0,2,1));
|
||||
XMVECTOR N1 = XM_PERMUTE_PS(NormalAxis,_MM_SHUFFLE(3,1,0,2));
|
||||
|
||||
XMVECTOR V0 = _mm_mul_ps(C2, N0);
|
||||
V0 = _mm_mul_ps(V0, N1);
|
||||
@ -1614,18 +1614,18 @@ inline XMMATRIX XMMatrixRotationNormal
|
||||
|
||||
V0 = _mm_and_ps(R0,g_XMMask3);
|
||||
XMVECTOR V1 = _mm_shuffle_ps(R1,R2,_MM_SHUFFLE(2,1,2,0));
|
||||
V1 = _mm_shuffle_ps(V1,V1,_MM_SHUFFLE(0,3,2,1));
|
||||
V1 = XM_PERMUTE_PS(V1,_MM_SHUFFLE(0,3,2,1));
|
||||
XMVECTOR V2 = _mm_shuffle_ps(R1,R2,_MM_SHUFFLE(0,0,1,1));
|
||||
V2 = _mm_shuffle_ps(V2,V2,_MM_SHUFFLE(2,0,2,0));
|
||||
V2 = XM_PERMUTE_PS(V2,_MM_SHUFFLE(2,0,2,0));
|
||||
|
||||
R2 = _mm_shuffle_ps(V0,V1,_MM_SHUFFLE(1,0,3,0));
|
||||
R2 = _mm_shuffle_ps(R2,R2,_MM_SHUFFLE(1,3,2,0));
|
||||
R2 = XM_PERMUTE_PS(R2,_MM_SHUFFLE(1,3,2,0));
|
||||
|
||||
XMMATRIX M;
|
||||
M.r[0] = R2;
|
||||
|
||||
R2 = _mm_shuffle_ps(V0,V1,_MM_SHUFFLE(3,2,3,1));
|
||||
R2 = _mm_shuffle_ps(R2,R2,_MM_SHUFFLE(1,3,0,2));
|
||||
R2 = XM_PERMUTE_PS(R2,_MM_SHUFFLE(1,3,0,2));
|
||||
M.r[1] = R2;
|
||||
|
||||
V2 = _mm_shuffle_ps(V2,V0,_MM_SHUFFLE(3,2,1,0));
|
||||
@ -1702,37 +1702,37 @@ inline XMMATRIX XMMatrixRotationQuaternion
|
||||
XMVECTOR Q0 = _mm_add_ps(Quaternion,Quaternion);
|
||||
XMVECTOR Q1 = _mm_mul_ps(Quaternion,Q0);
|
||||
|
||||
XMVECTOR V0 = _mm_shuffle_ps(Q1,Q1,_MM_SHUFFLE(3,0,0,1));
|
||||
XMVECTOR V0 = XM_PERMUTE_PS(Q1,_MM_SHUFFLE(3,0,0,1));
|
||||
V0 = _mm_and_ps(V0,g_XMMask3);
|
||||
XMVECTOR V1 = _mm_shuffle_ps(Q1,Q1,_MM_SHUFFLE(3,1,2,2));
|
||||
XMVECTOR V1 = XM_PERMUTE_PS(Q1,_MM_SHUFFLE(3,1,2,2));
|
||||
V1 = _mm_and_ps(V1,g_XMMask3);
|
||||
XMVECTOR R0 = _mm_sub_ps(Constant1110,V0);
|
||||
R0 = _mm_sub_ps(R0, V1);
|
||||
|
||||
V0 = _mm_shuffle_ps(Quaternion,Quaternion,_MM_SHUFFLE(3,1,0,0));
|
||||
V1 = _mm_shuffle_ps(Q0,Q0,_MM_SHUFFLE(3,2,1,2));
|
||||
V0 = XM_PERMUTE_PS(Quaternion,_MM_SHUFFLE(3,1,0,0));
|
||||
V1 = XM_PERMUTE_PS(Q0,_MM_SHUFFLE(3,2,1,2));
|
||||
V0 = _mm_mul_ps(V0, V1);
|
||||
|
||||
V1 = _mm_shuffle_ps(Quaternion,Quaternion,_MM_SHUFFLE(3,3,3,3));
|
||||
XMVECTOR V2 = _mm_shuffle_ps(Q0,Q0,_MM_SHUFFLE(3,0,2,1));
|
||||
V1 = XM_PERMUTE_PS(Quaternion,_MM_SHUFFLE(3,3,3,3));
|
||||
XMVECTOR V2 = XM_PERMUTE_PS(Q0,_MM_SHUFFLE(3,0,2,1));
|
||||
V1 = _mm_mul_ps(V1, V2);
|
||||
|
||||
XMVECTOR R1 = _mm_add_ps(V0, V1);
|
||||
XMVECTOR R2 = _mm_sub_ps(V0, V1);
|
||||
|
||||
V0 = _mm_shuffle_ps(R1,R2,_MM_SHUFFLE(1,0,2,1));
|
||||
V0 = _mm_shuffle_ps(V0,V0,_MM_SHUFFLE(1,3,2,0));
|
||||
V0 = XM_PERMUTE_PS(V0,_MM_SHUFFLE(1,3,2,0));
|
||||
V1 = _mm_shuffle_ps(R1,R2,_MM_SHUFFLE(2,2,0,0));
|
||||
V1 = _mm_shuffle_ps(V1,V1,_MM_SHUFFLE(2,0,2,0));
|
||||
V1 = XM_PERMUTE_PS(V1,_MM_SHUFFLE(2,0,2,0));
|
||||
|
||||
Q1 = _mm_shuffle_ps(R0,V0,_MM_SHUFFLE(1,0,3,0));
|
||||
Q1 = _mm_shuffle_ps(Q1,Q1,_MM_SHUFFLE(1,3,2,0));
|
||||
Q1 = XM_PERMUTE_PS(Q1,_MM_SHUFFLE(1,3,2,0));
|
||||
|
||||
XMMATRIX M;
|
||||
M.r[0] = Q1;
|
||||
|
||||
Q1 = _mm_shuffle_ps(R0,V0,_MM_SHUFFLE(3,2,3,1));
|
||||
Q1 = _mm_shuffle_ps(Q1,Q1,_MM_SHUFFLE(1,3,0,2));
|
||||
Q1 = XM_PERMUTE_PS(Q1,_MM_SHUFFLE(1,3,0,2));
|
||||
M.r[1] = Q1;
|
||||
|
||||
Q1 = _mm_shuffle_ps(V1,R0,_MM_SHUFFLE(3,2,1,0));
|
||||
@ -2487,10 +2487,10 @@ inline XMMATRIX XMMatrixPerspectiveOffCenterLH
|
||||
vTemp = _mm_and_ps(vTemp,g_XMMaskY);
|
||||
M.r[1] = vTemp;
|
||||
// 0,0,fRange,1.0f
|
||||
M.m[2][0] = -(ViewLeft + ViewRight) * ReciprocalWidth;
|
||||
M.m[2][1] = -(ViewTop + ViewBottom) * ReciprocalHeight;
|
||||
M.m[2][2] = fRange;
|
||||
M.m[2][3] = 1.0f;
|
||||
M.r[2] = XMVectorSet( -(ViewLeft + ViewRight) * ReciprocalWidth,
|
||||
-(ViewTop + ViewBottom) * ReciprocalHeight,
|
||||
fRange,
|
||||
1.0f );
|
||||
// 0,0,-fRange * NearZ,0.0f
|
||||
vValues = _mm_and_ps(vValues,g_XMMaskZ);
|
||||
M.r[3] = vValues;
|
||||
@ -2585,10 +2585,10 @@ inline XMMATRIX XMMatrixPerspectiveOffCenterRH
|
||||
vTemp = _mm_and_ps(vTemp,g_XMMaskY);
|
||||
M.r[1] = vTemp;
|
||||
// 0,0,fRange,1.0f
|
||||
M.m[2][0] = (ViewLeft + ViewRight) * ReciprocalWidth;
|
||||
M.m[2][1] = (ViewTop + ViewBottom) * ReciprocalHeight;
|
||||
M.m[2][2] = fRange;
|
||||
M.m[2][3] = -1.0f;
|
||||
M.r[2] = XMVectorSet( (ViewLeft + ViewRight) * ReciprocalWidth,
|
||||
(ViewTop + ViewBottom) * ReciprocalHeight,
|
||||
fRange,
|
||||
-1.0f );
|
||||
// 0,0,-fRange * NearZ,0.0f
|
||||
vValues = _mm_and_ps(vValues,g_XMMaskZ);
|
||||
M.r[3] = vValues;
|
||||
|
@ -156,23 +156,23 @@ inline XMVECTOR XMQuaternionMultiply
|
||||
XMVECTOR Q2Z = Q2;
|
||||
XMVECTOR vResult = Q2;
|
||||
// Splat with one instruction
|
||||
vResult = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(3,3,3,3));
|
||||
Q2X = _mm_shuffle_ps(Q2X,Q2X,_MM_SHUFFLE(0,0,0,0));
|
||||
Q2Y = _mm_shuffle_ps(Q2Y,Q2Y,_MM_SHUFFLE(1,1,1,1));
|
||||
Q2Z = _mm_shuffle_ps(Q2Z,Q2Z,_MM_SHUFFLE(2,2,2,2));
|
||||
vResult = XM_PERMUTE_PS(vResult,_MM_SHUFFLE(3,3,3,3));
|
||||
Q2X = XM_PERMUTE_PS(Q2X,_MM_SHUFFLE(0,0,0,0));
|
||||
Q2Y = XM_PERMUTE_PS(Q2Y,_MM_SHUFFLE(1,1,1,1));
|
||||
Q2Z = XM_PERMUTE_PS(Q2Z,_MM_SHUFFLE(2,2,2,2));
|
||||
// Retire Q1 and perform Q1*Q2W
|
||||
vResult = _mm_mul_ps(vResult,Q1);
|
||||
XMVECTOR Q1Shuffle = Q1;
|
||||
// Shuffle the copies of Q1
|
||||
Q1Shuffle = _mm_shuffle_ps(Q1Shuffle,Q1Shuffle,_MM_SHUFFLE(0,1,2,3));
|
||||
Q1Shuffle = XM_PERMUTE_PS(Q1Shuffle,_MM_SHUFFLE(0,1,2,3));
|
||||
// Mul by Q1WZYX
|
||||
Q2X = _mm_mul_ps(Q2X,Q1Shuffle);
|
||||
Q1Shuffle = _mm_shuffle_ps(Q1Shuffle,Q1Shuffle,_MM_SHUFFLE(2,3,0,1));
|
||||
Q1Shuffle = XM_PERMUTE_PS(Q1Shuffle,_MM_SHUFFLE(2,3,0,1));
|
||||
// Flip the signs on y and z
|
||||
Q2X = _mm_mul_ps(Q2X,ControlWZYX);
|
||||
// Mul by Q1ZWXY
|
||||
Q2Y = _mm_mul_ps(Q2Y,Q1Shuffle);
|
||||
Q1Shuffle = _mm_shuffle_ps(Q1Shuffle,Q1Shuffle,_MM_SHUFFLE(0,1,2,3));
|
||||
Q1Shuffle = XM_PERMUTE_PS(Q1Shuffle,_MM_SHUFFLE(0,1,2,3));
|
||||
// Flip the signs on z and w
|
||||
Q2Y = _mm_mul_ps(Q2Y,ControlZWXY);
|
||||
// Mul by Q1YXWZ
|
||||
@ -438,7 +438,7 @@ inline XMVECTOR XMQuaternionSlerpV
|
||||
|
||||
XMVECTOR Omega = XMVectorATan2(SinOmega, CosOmega);
|
||||
|
||||
XMVECTOR V01 = _mm_shuffle_ps(T,T,_MM_SHUFFLE(2,3,0,1));
|
||||
XMVECTOR V01 = XM_PERMUTE_PS(T,_MM_SHUFFLE(2,3,0,1));
|
||||
V01 = _mm_and_ps(V01,MaskXY);
|
||||
V01 = _mm_xor_ps(V01,SignMask2);
|
||||
V01 = _mm_add_ps(g_XMIdentityR0, V01);
|
||||
@ -897,11 +897,11 @@ inline XMVECTOR XMQuaternionRotationMatrix
|
||||
XMVECTOR r2 = M.r[2]; // (r20, r21, r22, 0)
|
||||
|
||||
// (r00, r00, r00, r00)
|
||||
XMVECTOR r00 = _mm_shuffle_ps(r0, r0, _MM_SHUFFLE(0,0,0,0));
|
||||
XMVECTOR r00 = XM_PERMUTE_PS(r0, _MM_SHUFFLE(0,0,0,0));
|
||||
// (r11, r11, r11, r11)
|
||||
XMVECTOR r11 = _mm_shuffle_ps(r1, r1, _MM_SHUFFLE(1,1,1,1));
|
||||
XMVECTOR r11 = XM_PERMUTE_PS(r1, _MM_SHUFFLE(1,1,1,1));
|
||||
// (r22, r22, r22, r22)
|
||||
XMVECTOR r22 = _mm_shuffle_ps(r2, r2, _MM_SHUFFLE(2,2,2,2));
|
||||
XMVECTOR r22 = XM_PERMUTE_PS(r2, _MM_SHUFFLE(2,2,2,2));
|
||||
|
||||
// x^2 >= y^2 equivalent to r11 - r00 <= 0
|
||||
// (r11 - r00, r11 - r00, r11 - r00, r11 - r00)
|
||||
@ -935,7 +935,7 @@ inline XMVECTOR XMQuaternionRotationMatrix
|
||||
// (r10, r10, r20, r21)
|
||||
t1 = _mm_shuffle_ps(r1, r2, _MM_SHUFFLE(1,0,0,0));
|
||||
// (r10, r20, r21, r10)
|
||||
t1 = _mm_shuffle_ps(t1, t1, _MM_SHUFFLE(1,3,2,0));
|
||||
t1 = XM_PERMUTE_PS(t1, _MM_SHUFFLE(1,3,2,0));
|
||||
// (4*x*y, 4*x*z, 4*y*z, unused)
|
||||
XMVECTOR xyxzyz = _mm_add_ps(t0, t1);
|
||||
|
||||
@ -944,7 +944,7 @@ inline XMVECTOR XMQuaternionRotationMatrix
|
||||
// (r12, r12, r02, r01)
|
||||
t1 = _mm_shuffle_ps(r1, r0, _MM_SHUFFLE(1,2,2,2));
|
||||
// (r12, r02, r01, r12)
|
||||
t1 = _mm_shuffle_ps(t1, t1, _MM_SHUFFLE(1,3,2,0));
|
||||
t1 = XM_PERMUTE_PS(t1, _MM_SHUFFLE(1,3,2,0));
|
||||
// (4*x*w, 4*y*w, 4*z*w, unused)
|
||||
XMVECTOR xwywzw = _mm_sub_ps(t0, t1);
|
||||
xwywzw = _mm_mul_ps(XMMPMP, xwywzw);
|
||||
@ -1137,15 +1137,15 @@ inline XMVECTOR XMPlaneNormalizeEst
|
||||
// Perform the dot product
|
||||
XMVECTOR vDot = _mm_mul_ps(P,P);
|
||||
// x=Dot.y, y=Dot.z
|
||||
XMVECTOR vTemp = _mm_shuffle_ps(vDot,vDot,_MM_SHUFFLE(2,1,2,1));
|
||||
XMVECTOR vTemp = XM_PERMUTE_PS(vDot,_MM_SHUFFLE(2,1,2,1));
|
||||
// Result.x = x+y
|
||||
vDot = _mm_add_ss(vDot,vTemp);
|
||||
// x=Dot.z
|
||||
vTemp = _mm_shuffle_ps(vTemp,vTemp,_MM_SHUFFLE(1,1,1,1));
|
||||
vTemp = XM_PERMUTE_PS(vTemp,_MM_SHUFFLE(1,1,1,1));
|
||||
// Result.x = (x+y)+z
|
||||
vDot = _mm_add_ss(vDot,vTemp);
|
||||
// Splat x
|
||||
vDot = _mm_shuffle_ps(vDot,vDot,_MM_SHUFFLE(0,0,0,0));
|
||||
vDot = XM_PERMUTE_PS(vDot,_MM_SHUFFLE(0,0,0,0));
|
||||
// Get the reciprocal
|
||||
vDot = _mm_rsqrt_ps(vDot);
|
||||
// Get the reciprocal
|
||||
@ -1183,11 +1183,11 @@ inline XMVECTOR XMPlaneNormalize
|
||||
#elif defined(_XM_SSE_INTRINSICS_)
|
||||
// Perform the dot product on x,y and z only
|
||||
XMVECTOR vLengthSq = _mm_mul_ps(P,P);
|
||||
XMVECTOR vTemp = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(2,1,2,1));
|
||||
XMVECTOR vTemp = XM_PERMUTE_PS(vLengthSq,_MM_SHUFFLE(2,1,2,1));
|
||||
vLengthSq = _mm_add_ss(vLengthSq,vTemp);
|
||||
vTemp = _mm_shuffle_ps(vTemp,vTemp,_MM_SHUFFLE(1,1,1,1));
|
||||
vTemp = XM_PERMUTE_PS(vTemp,_MM_SHUFFLE(1,1,1,1));
|
||||
vLengthSq = _mm_add_ss(vLengthSq,vTemp);
|
||||
vLengthSq = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(0,0,0,0));
|
||||
vLengthSq = XM_PERMUTE_PS(vLengthSq,_MM_SHUFFLE(0,0,0,0));
|
||||
// Prepare for the division
|
||||
XMVECTOR vResult = _mm_sqrt_ps(vLengthSq);
|
||||
// Failsafe on zero (Or epsilon) length planes
|
||||
@ -1531,24 +1531,11 @@ inline XMVECTOR XMColorAdjustSaturation
|
||||
return vbslq_f32( g_XMSelect1110, vResult, vColor );
|
||||
#elif defined(_XM_SSE_INTRINSICS_)
|
||||
static const XMVECTORF32 gvLuminance = {0.2125f, 0.7154f, 0.0721f, 0.0f};
|
||||
// Mul RGB by intensity constants
|
||||
XMVECTOR vLuminance = _mm_mul_ps(vColor,gvLuminance);
|
||||
// vResult.x = vLuminance.y, vResult.y = vLuminance.y,
|
||||
// vResult.z = vLuminance.z, vResult.w = vLuminance.z
|
||||
XMVECTOR vResult = vLuminance;
|
||||
vResult = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(2,2,1,1));
|
||||
// vLuminance.x += vLuminance.y
|
||||
vLuminance = _mm_add_ss(vLuminance,vResult);
|
||||
// Splat vLuminance.z
|
||||
vResult = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(2,2,2,2));
|
||||
// vLuminance.x += vLuminance.z (Dot product)
|
||||
vLuminance = _mm_add_ss(vLuminance,vResult);
|
||||
// Splat vLuminance
|
||||
vLuminance = _mm_shuffle_ps(vLuminance,vLuminance,_MM_SHUFFLE(0,0,0,0));
|
||||
XMVECTOR vLuminance = XMVector3Dot( vColor, gvLuminance );
|
||||
// Splat fSaturation
|
||||
XMVECTOR vSaturation = _mm_set_ps1(fSaturation);
|
||||
// vResult = ((vColor-vLuminance)*vSaturation)+vLuminance;
|
||||
vResult = _mm_sub_ps(vColor,vLuminance);
|
||||
XMVECTOR vResult = _mm_sub_ps(vColor,vLuminance);
|
||||
vResult = _mm_mul_ps(vResult,vSaturation);
|
||||
vResult = _mm_add_ps(vResult,vLuminance);
|
||||
// Retain w from the source color
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -250,7 +250,7 @@ inline XMVECTOR PackedVector::XMLoadShortN2
|
||||
// x needs to be sign extended
|
||||
vTemp = _mm_xor_ps(vTemp,g_XMFlipX16Y16);
|
||||
// Convert to floating point numbers
|
||||
vTemp = _mm_cvtepi32_ps(reinterpret_cast<const __m128i *>(&vTemp)[0]);
|
||||
vTemp = _mm_cvtepi32_ps(_mm_castps_si128(vTemp));
|
||||
// x - 0x8000 to undo the signed order.
|
||||
vTemp = _mm_add_ps(vTemp,g_XMFixX16Y16);
|
||||
// Convert -1.0f - 1.0f
|
||||
@ -286,7 +286,7 @@ inline XMVECTOR PackedVector::XMLoadShort2
|
||||
// x needs to be sign extended
|
||||
vTemp = _mm_xor_ps(vTemp,g_XMFlipX16Y16);
|
||||
// Convert to floating point numbers
|
||||
vTemp = _mm_cvtepi32_ps(reinterpret_cast<const __m128i *>(&vTemp)[0]);
|
||||
vTemp = _mm_cvtepi32_ps(_mm_castps_si128(vTemp));
|
||||
// x - 0x8000 to undo the signed order.
|
||||
vTemp = _mm_add_ps(vTemp,g_XMFixX16Y16);
|
||||
// Y is 65536 too large
|
||||
@ -322,7 +322,7 @@ inline XMVECTOR PackedVector::XMLoadUShortN2
|
||||
// y needs to be sign flipped
|
||||
vTemp = _mm_xor_ps(vTemp,g_XMFlipY);
|
||||
// Convert to floating point numbers
|
||||
vTemp = _mm_cvtepi32_ps(reinterpret_cast<const __m128i *>(&vTemp)[0]);
|
||||
vTemp = _mm_cvtepi32_ps(_mm_castps_si128(vTemp));
|
||||
// y + 0x8000 to undo the signed order.
|
||||
vTemp = _mm_add_ps(vTemp,FixaddY16);
|
||||
// Y is 65536 times too large
|
||||
@ -358,7 +358,7 @@ inline XMVECTOR PackedVector::XMLoadUShort2
|
||||
// y needs to be sign flipped
|
||||
vTemp = _mm_xor_ps(vTemp,g_XMFlipY);
|
||||
// Convert to floating point numbers
|
||||
vTemp = _mm_cvtepi32_ps(reinterpret_cast<const __m128i *>(&vTemp)[0]);
|
||||
vTemp = _mm_cvtepi32_ps(_mm_castps_si128(vTemp));
|
||||
// Y is 65536 times too large
|
||||
vTemp = _mm_mul_ps(vTemp,g_XMFixupY16);
|
||||
// y + 0x8000 to undo the signed order.
|
||||
@ -452,7 +452,7 @@ inline XMVECTOR PackedVector::XMLoadU565
|
||||
// Mask off x, y and z
|
||||
vResult = _mm_and_ps(vResult,U565And);
|
||||
// Convert to float
|
||||
vResult = _mm_cvtepi32_ps(reinterpret_cast<const __m128i *>(&vResult)[0]);
|
||||
vResult = _mm_cvtepi32_ps(_mm_castps_si128(vResult));
|
||||
// Normalize x, y, and z
|
||||
vResult = _mm_mul_ps(vResult,U565Mul);
|
||||
return vResult;
|
||||
@ -741,17 +741,17 @@ inline XMVECTOR PackedVector::XMLoadShortN4
|
||||
// Splat the color in all four entries (x,z,y,w)
|
||||
__m128d vIntd = _mm_load1_pd(reinterpret_cast<const double *>(&pSource->x));
|
||||
// Shift x&0ffff,z&0xffff,y&0xffff0000,w&0xffff0000
|
||||
__m128 vTemp = _mm_and_ps(reinterpret_cast<const __m128 *>(&vIntd)[0],g_XMMaskX16Y16Z16W16);
|
||||
__m128 vTemp = _mm_and_ps(_mm_castpd_ps(vIntd),g_XMMaskX16Y16Z16W16);
|
||||
// x and z are unsigned! Flip the bits to convert the order to signed
|
||||
vTemp = _mm_xor_ps(vTemp,g_XMFlipX16Y16Z16W16);
|
||||
// Convert to floating point numbers
|
||||
vTemp = _mm_cvtepi32_ps(reinterpret_cast<const __m128i *>(&vTemp)[0]);
|
||||
vTemp = _mm_cvtepi32_ps(_mm_castps_si128(vTemp));
|
||||
// x and z - 0x8000 to complete the conversion
|
||||
vTemp = _mm_add_ps(vTemp,g_XMFixX16Y16Z16W16);
|
||||
// Convert to -1.0f - 1.0f
|
||||
vTemp = _mm_mul_ps(vTemp,g_XMNormalizeX16Y16Z16W16);
|
||||
// Very important! The entries are x,z,y,w, flip it to x,y,z,w
|
||||
vTemp = _mm_shuffle_ps(vTemp,vTemp,_MM_SHUFFLE(3,1,2,0));
|
||||
vTemp = XM_PERMUTE_PS(vTemp,_MM_SHUFFLE(3,1,2,0));
|
||||
// Clamp result (for case of -32768)
|
||||
return _mm_max_ps( vTemp, g_XMNegativeOne );
|
||||
#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
|
||||
@ -782,17 +782,17 @@ inline XMVECTOR PackedVector::XMLoadShort4
|
||||
// Splat the color in all four entries (x,z,y,w)
|
||||
__m128d vIntd = _mm_load1_pd(reinterpret_cast<const double *>(&pSource->x));
|
||||
// Shift x&0ffff,z&0xffff,y&0xffff0000,w&0xffff0000
|
||||
__m128 vTemp = _mm_and_ps(reinterpret_cast<const __m128 *>(&vIntd)[0],g_XMMaskX16Y16Z16W16);
|
||||
__m128 vTemp = _mm_and_ps(_mm_castpd_ps(vIntd),g_XMMaskX16Y16Z16W16);
|
||||
// x and z are unsigned! Flip the bits to convert the order to signed
|
||||
vTemp = _mm_xor_ps(vTemp,g_XMFlipX16Y16Z16W16);
|
||||
// Convert to floating point numbers
|
||||
vTemp = _mm_cvtepi32_ps(reinterpret_cast<const __m128i *>(&vTemp)[0]);
|
||||
vTemp = _mm_cvtepi32_ps(_mm_castps_si128(vTemp));
|
||||
// x and z - 0x8000 to complete the conversion
|
||||
vTemp = _mm_add_ps(vTemp,g_XMFixX16Y16Z16W16);
|
||||
// Fix y and w because they are 65536 too large
|
||||
vTemp = _mm_mul_ps(vTemp,g_XMFixupY16W16);
|
||||
// Very important! The entries are x,z,y,w, flip it to x,y,z,w
|
||||
return _mm_shuffle_ps(vTemp,vTemp,_MM_SHUFFLE(3,1,2,0));
|
||||
return XM_PERMUTE_PS(vTemp,_MM_SHUFFLE(3,1,2,0));
|
||||
#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
|
||||
#endif // _XM_VMX128_INTRINSICS_
|
||||
}
|
||||
@ -825,17 +825,17 @@ inline XMVECTOR PackedVector::XMLoadUShortN4
|
||||
// Splat the color in all four entries (x,z,y,w)
|
||||
__m128d vIntd = _mm_load1_pd(reinterpret_cast<const double *>(&pSource->x));
|
||||
// Shift x&0ffff,z&0xffff,y&0xffff0000,w&0xffff0000
|
||||
__m128 vTemp = _mm_and_ps(reinterpret_cast<const __m128 *>(&vIntd)[0],g_XMMaskX16Y16Z16W16);
|
||||
__m128 vTemp = _mm_and_ps(_mm_castpd_ps(vIntd),g_XMMaskX16Y16Z16W16);
|
||||
// y and w are signed! Flip the bits to convert the order to unsigned
|
||||
vTemp = _mm_xor_ps(vTemp,g_XMFlipZW);
|
||||
// Convert to floating point numbers
|
||||
vTemp = _mm_cvtepi32_ps(reinterpret_cast<const __m128i *>(&vTemp)[0]);
|
||||
vTemp = _mm_cvtepi32_ps(_mm_castps_si128(vTemp));
|
||||
// y and w + 0x8000 to complete the conversion
|
||||
vTemp = _mm_add_ps(vTemp,FixaddY16W16);
|
||||
// Fix y and w because they are 65536 too large
|
||||
vTemp = _mm_mul_ps(vTemp,FixupY16W16);
|
||||
// Very important! The entries are x,z,y,w, flip it to x,y,z,w
|
||||
return _mm_shuffle_ps(vTemp,vTemp,_MM_SHUFFLE(3,1,2,0));
|
||||
return XM_PERMUTE_PS(vTemp,_MM_SHUFFLE(3,1,2,0));
|
||||
#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
|
||||
#endif // _XM_VMX128_INTRINSICS_
|
||||
}
|
||||
@ -865,17 +865,17 @@ inline XMVECTOR PackedVector::XMLoadUShort4
|
||||
// Splat the color in all four entries (x,z,y,w)
|
||||
__m128d vIntd = _mm_load1_pd(reinterpret_cast<const double *>(&pSource->x));
|
||||
// Shift x&0ffff,z&0xffff,y&0xffff0000,w&0xffff0000
|
||||
__m128 vTemp = _mm_and_ps(reinterpret_cast<const __m128 *>(&vIntd)[0],g_XMMaskX16Y16Z16W16);
|
||||
__m128 vTemp = _mm_and_ps(_mm_castpd_ps(vIntd),g_XMMaskX16Y16Z16W16);
|
||||
// y and w are signed! Flip the bits to convert the order to unsigned
|
||||
vTemp = _mm_xor_ps(vTemp,g_XMFlipZW);
|
||||
// Convert to floating point numbers
|
||||
vTemp = _mm_cvtepi32_ps(reinterpret_cast<const __m128i *>(&vTemp)[0]);
|
||||
vTemp = _mm_cvtepi32_ps(_mm_castps_si128(vTemp));
|
||||
// Fix y and w because they are 65536 too large
|
||||
vTemp = _mm_mul_ps(vTemp,g_XMFixupY16W16);
|
||||
// y and w + 0x8000 to complete the conversion
|
||||
vTemp = _mm_add_ps(vTemp,FixaddY16W16);
|
||||
// Very important! The entries are x,z,y,w, flip it to x,y,z,w
|
||||
return _mm_shuffle_ps(vTemp,vTemp,_MM_SHUFFLE(3,1,2,0));
|
||||
return XM_PERMUTE_PS(vTemp,_MM_SHUFFLE(3,1,2,0));
|
||||
#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
|
||||
#endif // _XM_VMX128_INTRINSICS_
|
||||
}
|
||||
@ -910,7 +910,7 @@ inline XMVECTOR PackedVector::XMLoadXDecN4
|
||||
// a is unsigned! Flip the bit to convert the order to signed
|
||||
vTemp = _mm_xor_ps(vTemp,g_XMFlipA2B10G10R10);
|
||||
// Convert to floating point numbers
|
||||
vTemp = _mm_cvtepi32_ps(reinterpret_cast<const __m128i *>(&vTemp)[0]);
|
||||
vTemp = _mm_cvtepi32_ps(_mm_castps_si128(vTemp));
|
||||
// RGB + 0, A + 0x80000000.f to undo the signed order.
|
||||
vTemp = _mm_add_ps(vTemp,g_XMFixAA2B10G10R10);
|
||||
// Convert 0-255 to 0.0f-1.0f
|
||||
@ -953,7 +953,7 @@ inline XMVECTOR PackedVector::XMLoadXDec4
|
||||
// a is unsigned! Flip the bit to convert the order to signed
|
||||
vTemp = _mm_xor_ps(vTemp,XDec4Xor);
|
||||
// Convert to floating point numbers
|
||||
vTemp = _mm_cvtepi32_ps(reinterpret_cast<const __m128i *>(&vTemp)[0]);
|
||||
vTemp = _mm_cvtepi32_ps(_mm_castps_si128(vTemp));
|
||||
// RGB + 0, A + 0x80000000.f to undo the signed order.
|
||||
vTemp = _mm_add_ps(vTemp,XDec4Add);
|
||||
// Convert 0-255 to 0.0f-1.0f
|
||||
@ -993,7 +993,7 @@ inline XMVECTOR PackedVector::XMLoadUDecN4
|
||||
// a is unsigned! Flip the bit to convert the order to signed
|
||||
vTemp = _mm_xor_ps(vTemp,g_XMFlipW);
|
||||
// Convert to floating point numbers
|
||||
vTemp = _mm_cvtepi32_ps(reinterpret_cast<const __m128i *>(&vTemp)[0]);
|
||||
vTemp = _mm_cvtepi32_ps(_mm_castps_si128(vTemp));
|
||||
// RGB + 0, A + 0x80000000.f to undo the signed order.
|
||||
vTemp = _mm_add_ps(vTemp,g_XMAddUDec4);
|
||||
// Convert 0-255 to 0.0f-1.0f
|
||||
@ -1031,7 +1031,7 @@ inline XMVECTOR PackedVector::XMLoadUDec4
|
||||
// a is unsigned! Flip the bit to convert the order to signed
|
||||
vTemp = _mm_xor_ps(vTemp,g_XMFlipW);
|
||||
// Convert to floating point numbers
|
||||
vTemp = _mm_cvtepi32_ps(reinterpret_cast<const __m128i *>(&vTemp)[0]);
|
||||
vTemp = _mm_cvtepi32_ps(_mm_castps_si128(vTemp));
|
||||
// RGB + 0, A + 0x80000000.f to undo the signed order.
|
||||
vTemp = _mm_add_ps(vTemp,g_XMAddUDec4);
|
||||
// Convert 0-255 to 0.0f-1.0f
|
||||
@ -1074,7 +1074,7 @@ inline XMVECTOR PackedVector::XMLoadDecN4
|
||||
// a is unsigned! Flip the bit to convert the order to signed
|
||||
vTemp = _mm_xor_ps(vTemp,g_XMXorDec4);
|
||||
// Convert to floating point numbers
|
||||
vTemp = _mm_cvtepi32_ps(reinterpret_cast<const __m128i *>(&vTemp)[0]);
|
||||
vTemp = _mm_cvtepi32_ps(_mm_castps_si128(vTemp));
|
||||
// RGB + 0, A + 0x80000000.f to undo the signed order.
|
||||
vTemp = _mm_add_ps(vTemp,g_XMAddDec4);
|
||||
// Convert 0-255 to 0.0f-1.0f
|
||||
@ -1117,7 +1117,7 @@ inline XMVECTOR PackedVector::XMLoadDec4
|
||||
// a is unsigned! Flip the bit to convert the order to signed
|
||||
vTemp = _mm_xor_ps(vTemp,g_XMXorDec4);
|
||||
// Convert to floating point numbers
|
||||
vTemp = _mm_cvtepi32_ps(reinterpret_cast<const __m128i *>(&vTemp)[0]);
|
||||
vTemp = _mm_cvtepi32_ps(_mm_castps_si128(vTemp));
|
||||
// RGB + 0, A + 0x80000000.f to undo the signed order.
|
||||
vTemp = _mm_add_ps(vTemp,g_XMAddDec4);
|
||||
// Convert 0-255 to 0.0f-1.0f
|
||||
@ -1152,7 +1152,7 @@ inline XMVECTOR PackedVector::XMLoadUByteN4
|
||||
// w is signed! Flip the bits to convert the order to unsigned
|
||||
vTemp = _mm_xor_ps(vTemp,g_XMFlipW);
|
||||
// Convert to floating point numbers
|
||||
vTemp = _mm_cvtepi32_ps(reinterpret_cast<const __m128i *>(&vTemp)[0]);
|
||||
vTemp = _mm_cvtepi32_ps(_mm_castps_si128(vTemp));
|
||||
// w + 0x80 to complete the conversion
|
||||
vTemp = _mm_add_ps(vTemp,g_XMAddUDec4);
|
||||
// Fix y, z and w because they are too large
|
||||
@ -1187,7 +1187,7 @@ inline XMVECTOR PackedVector::XMLoadUByte4
|
||||
// w is signed! Flip the bits to convert the order to unsigned
|
||||
vTemp = _mm_xor_ps(vTemp,g_XMFlipW);
|
||||
// Convert to floating point numbers
|
||||
vTemp = _mm_cvtepi32_ps(reinterpret_cast<const __m128i *>(&vTemp)[0]);
|
||||
vTemp = _mm_cvtepi32_ps(_mm_castps_si128(vTemp));
|
||||
// w + 0x80 to complete the conversion
|
||||
vTemp = _mm_add_ps(vTemp,g_XMAddUDec4);
|
||||
// Fix y, z and w because they are too large
|
||||
@ -1222,7 +1222,7 @@ inline XMVECTOR PackedVector::XMLoadByteN4
|
||||
// x,y and z are unsigned! Flip the bits to convert the order to signed
|
||||
vTemp = _mm_xor_ps(vTemp,g_XMXorByte4);
|
||||
// Convert to floating point numbers
|
||||
vTemp = _mm_cvtepi32_ps(reinterpret_cast<const __m128i *>(&vTemp)[0]);
|
||||
vTemp = _mm_cvtepi32_ps(_mm_castps_si128(vTemp));
|
||||
// x, y and z - 0x80 to complete the conversion
|
||||
vTemp = _mm_add_ps(vTemp,g_XMAddByte4);
|
||||
// Fix y, z and w because they are too large
|
||||
@ -1258,7 +1258,7 @@ inline XMVECTOR PackedVector::XMLoadByte4
|
||||
// x,y and z are unsigned! Flip the bits to convert the order to signed
|
||||
vTemp = _mm_xor_ps(vTemp,g_XMXorByte4);
|
||||
// Convert to floating point numbers
|
||||
vTemp = _mm_cvtepi32_ps(reinterpret_cast<const __m128i *>(&vTemp)[0]);
|
||||
vTemp = _mm_cvtepi32_ps(_mm_castps_si128(vTemp));
|
||||
// x, y and z - 0x80 to complete the conversion
|
||||
vTemp = _mm_add_ps(vTemp,g_XMAddByte4);
|
||||
// Fix y, z and w because they are too large
|
||||
@ -1284,7 +1284,7 @@ inline XMVECTOR PackedVector::XMLoadUNibble4
|
||||
// Mask off x, y and z
|
||||
vResult = _mm_and_ps(vResult,UNibble4And);
|
||||
// Convert to float
|
||||
vResult = _mm_cvtepi32_ps(reinterpret_cast<const __m128i *>(&vResult)[0]);
|
||||
vResult = _mm_cvtepi32_ps(_mm_castps_si128(vResult));
|
||||
// Normalize x, y, and z
|
||||
vResult = _mm_mul_ps(vResult,UNibble4Mul);
|
||||
return vResult;
|
||||
@ -1315,7 +1315,7 @@ inline XMVECTOR PackedVector::XMLoadU555
|
||||
// Mask off x, y and z
|
||||
vResult = _mm_and_ps(vResult,U555And);
|
||||
// Convert to float
|
||||
vResult = _mm_cvtepi32_ps(reinterpret_cast<const __m128i *>(&vResult)[0]);
|
||||
vResult = _mm_cvtepi32_ps(_mm_castps_si128(vResult));
|
||||
// Normalize x, y, and z
|
||||
vResult = _mm_mul_ps(vResult,U555Mul);
|
||||
return vResult;
|
||||
@ -1369,7 +1369,7 @@ inline void PackedVector::XMStoreColor
|
||||
// Convert to 0-255
|
||||
vResult = _mm_mul_ps(vResult,Scale);
|
||||
// Shuffle RGBA to ARGB
|
||||
vResult = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(3,0,1,2));
|
||||
vResult = XM_PERMUTE_PS(vResult,_MM_SHUFFLE(3,0,1,2));
|
||||
// Convert to int
|
||||
__m128i vInt = _mm_cvtps_epi32(vResult);
|
||||
// Mash to shorts
|
||||
@ -1431,7 +1431,7 @@ inline void PackedVector::XMStoreShortN2
|
||||
vResult = _mm_mul_ps(vResult,Scale);
|
||||
__m128i vResulti = _mm_cvtps_epi32(vResult);
|
||||
vResulti = _mm_packs_epi32(vResulti,vResulti);
|
||||
_mm_store_ss(reinterpret_cast<float *>(&pDestination->x),reinterpret_cast<const __m128 *>(&vResulti)[0]);
|
||||
_mm_store_ss(reinterpret_cast<float *>(&pDestination->x),_mm_castsi128_ps(vResulti));
|
||||
#else // _XM_VMX128_INTRINSICS_
|
||||
#endif // _XM_VMX128_INTRINSICS_
|
||||
}
|
||||
@ -1469,7 +1469,7 @@ inline void PackedVector::XMStoreShort2
|
||||
__m128i vInt = _mm_cvtps_epi32(vResult);
|
||||
// Pack the ints into shorts
|
||||
vInt = _mm_packs_epi32(vInt,vInt);
|
||||
_mm_store_ss(reinterpret_cast<float *>(&pDestination->x),reinterpret_cast<const __m128 *>(&vInt)[0]);
|
||||
_mm_store_ss(reinterpret_cast<float *>(&pDestination->x),_mm_castsi128_ps(vInt));
|
||||
#else // _XM_VMX128_INTRINSICS_
|
||||
#endif // _XM_VMX128_INTRINSICS_
|
||||
}
|
||||
@ -1942,7 +1942,7 @@ inline void PackedVector::XMStoreShortN4
|
||||
vResult = _mm_mul_ps(vResult,Scale);
|
||||
__m128i vResulti = _mm_cvtps_epi32(vResult);
|
||||
vResulti = _mm_packs_epi32(vResulti,vResulti);
|
||||
_mm_store_sd(reinterpret_cast<double *>(&pDestination->x),reinterpret_cast<const __m128d *>(&vResulti)[0]);
|
||||
_mm_store_sd(reinterpret_cast<double *>(&pDestination->x),_mm_castsi128_pd(vResulti));
|
||||
#else // _XM_VMX128_INTRINSICS_
|
||||
#endif // _XM_VMX128_INTRINSICS_
|
||||
}
|
||||
@ -1991,7 +1991,7 @@ inline void PackedVector::XMStoreShort4
|
||||
__m128i vInt = _mm_cvtps_epi32(vResult);
|
||||
// Pack the ints into shorts
|
||||
vInt = _mm_packs_epi32(vInt,vInt);
|
||||
_mm_store_sd(reinterpret_cast<double *>(&pDestination->x),reinterpret_cast<const __m128d *>(&vInt)[0]);
|
||||
_mm_store_sd(reinterpret_cast<double *>(&pDestination->x),_mm_castsi128_pd(vInt));
|
||||
#else // _XM_VMX128_INTRINSICS_
|
||||
#endif // _XM_VMX128_INTRINSICS_
|
||||
}
|
||||
@ -2138,13 +2138,13 @@ inline void PackedVector::XMStoreXDecN4
|
||||
__m128i vResultw = _mm_and_si128(vResulti,g_XMMaskW);
|
||||
vResulti = _mm_add_epi32(vResulti,vResultw);
|
||||
// Do a horizontal or of all 4 entries
|
||||
vResult = _mm_shuffle_ps(reinterpret_cast<const __m128 *>(&vResulti)[0],reinterpret_cast<const __m128 *>(&vResulti)[0],_MM_SHUFFLE(0,3,2,1));
|
||||
vResulti = _mm_or_si128(vResulti,reinterpret_cast<const __m128i *>(&vResult)[0]);
|
||||
vResult = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(0,3,2,1));
|
||||
vResulti = _mm_or_si128(vResulti,reinterpret_cast<const __m128i *>(&vResult)[0]);
|
||||
vResult = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(0,3,2,1));
|
||||
vResulti = _mm_or_si128(vResulti,reinterpret_cast<const __m128i *>(&vResult)[0]);
|
||||
_mm_store_ss(reinterpret_cast<float *>(&pDestination->v),reinterpret_cast<const __m128 *>(&vResulti)[0]);
|
||||
vResult = XM_PERMUTE_PS(_mm_castsi128_ps(vResulti),_MM_SHUFFLE(0,3,2,1));
|
||||
vResulti = _mm_or_si128(vResulti,_mm_castps_si128(vResult));
|
||||
vResult = XM_PERMUTE_PS(vResult,_MM_SHUFFLE(0,3,2,1));
|
||||
vResulti = _mm_or_si128(vResulti,_mm_castps_si128(vResult));
|
||||
vResult = XM_PERMUTE_PS(vResult,_MM_SHUFFLE(0,3,2,1));
|
||||
vResulti = _mm_or_si128(vResulti,_mm_castps_si128(vResult));
|
||||
_mm_store_ss(reinterpret_cast<float *>(&pDestination->v),_mm_castsi128_ps(vResulti));
|
||||
#else // _XM_VMX128_INTRINSICS_
|
||||
#endif // _XM_VMX128_INTRINSICS_
|
||||
}
|
||||
@ -2197,7 +2197,7 @@ inline void PackedVector::XMStoreXDec4
|
||||
vResulti2 = _mm_add_epi32(vResulti2,vResulti2);
|
||||
// i = x|y|z|w
|
||||
vResulti = _mm_or_si128(vResulti,vResulti2);
|
||||
_mm_store_ss(reinterpret_cast<float *>(&pDestination->v),reinterpret_cast<const __m128 *>(&vResulti)[0]);
|
||||
_mm_store_ss(reinterpret_cast<float *>(&pDestination->v),_mm_castsi128_ps(vResulti));
|
||||
#else // _XM_VMX128_INTRINSICS_
|
||||
#endif // _XM_VMX128_INTRINSICS_
|
||||
}
|
||||
@ -2248,7 +2248,7 @@ inline void PackedVector::XMStoreUDecN4
|
||||
vResulti2 = _mm_add_epi32(vResulti2,vResulti2);
|
||||
// i = x|y|z|w
|
||||
vResulti = _mm_or_si128(vResulti,vResulti2);
|
||||
_mm_store_ss(reinterpret_cast<float *>(&pDestination->v),reinterpret_cast<const __m128 *>(&vResulti)[0]);
|
||||
_mm_store_ss(reinterpret_cast<float *>(&pDestination->v),_mm_castsi128_ps(vResulti));
|
||||
#else // _XM_VMX128_INTRINSICS_
|
||||
#endif // _XM_VMX128_INTRINSICS_
|
||||
}
|
||||
@ -2299,7 +2299,7 @@ inline void PackedVector::XMStoreUDec4
|
||||
vResulti2 = _mm_add_epi32(vResulti2,vResulti2);
|
||||
// i = x|y|z|w
|
||||
vResulti = _mm_or_si128(vResulti,vResulti2);
|
||||
_mm_store_ss(reinterpret_cast<float *>(&pDestination->v),reinterpret_cast<const __m128 *>(&vResulti)[0]);
|
||||
_mm_store_ss(reinterpret_cast<float *>(&pDestination->v),_mm_castsi128_ps(vResulti));
|
||||
#else // _XM_VMX128_INTRINSICS_
|
||||
#endif // _XM_VMX128_INTRINSICS_
|
||||
}
|
||||
@ -2348,7 +2348,7 @@ inline void PackedVector::XMStoreDecN4
|
||||
vResulti2 = _mm_shuffle_epi32(vResulti,_MM_SHUFFLE(1,1,1,1));
|
||||
// i = x|y|z|w
|
||||
vResulti = _mm_or_si128(vResulti,vResulti2);
|
||||
_mm_store_ss(reinterpret_cast<float *>(&pDestination->v),reinterpret_cast<const __m128 *>(&vResulti)[0]);
|
||||
_mm_store_ss(reinterpret_cast<float *>(&pDestination->v),_mm_castsi128_ps(vResulti));
|
||||
#else // _XM_VMX128_INTRINSICS_
|
||||
#endif // _XM_VMX128_INTRINSICS_
|
||||
}
|
||||
@ -2399,7 +2399,7 @@ inline void PackedVector::XMStoreDec4
|
||||
vResulti2 = _mm_shuffle_epi32(vResulti,_MM_SHUFFLE(1,1,1,1));
|
||||
// i = x|y|z|w
|
||||
vResulti = _mm_or_si128(vResulti,vResulti2);
|
||||
_mm_store_ss(reinterpret_cast<float *>(&pDestination->v),reinterpret_cast<const __m128 *>(&vResulti)[0]);
|
||||
_mm_store_ss(reinterpret_cast<float *>(&pDestination->v),_mm_castsi128_ps(vResulti));
|
||||
#else // _XM_VMX128_INTRINSICS_
|
||||
#endif // _XM_VMX128_INTRINSICS_
|
||||
}
|
||||
@ -2451,7 +2451,7 @@ inline void PackedVector::XMStoreUByteN4
|
||||
vResulti2 = _mm_add_epi32(vResulti2,vResulti2);
|
||||
// i = x|y|z|w
|
||||
vResulti = _mm_or_si128(vResulti,vResulti2);
|
||||
_mm_store_ss(reinterpret_cast<float *>(&pDestination->v),reinterpret_cast<const __m128 *>(&vResulti)[0]);
|
||||
_mm_store_ss(reinterpret_cast<float *>(&pDestination->v),_mm_castsi128_ps(vResulti));
|
||||
#else // _XM_VMX128_INTRINSICS_
|
||||
#endif // _XM_VMX128_INTRINSICS_
|
||||
}
|
||||
@ -2503,7 +2503,7 @@ inline void PackedVector::XMStoreUByte4
|
||||
vResulti2 = _mm_add_epi32(vResulti2,vResulti2);
|
||||
// i = x|y|z|w
|
||||
vResulti = _mm_or_si128(vResulti,vResulti2);
|
||||
_mm_store_ss(reinterpret_cast<float *>(&pDestination->v),reinterpret_cast<const __m128 *>(&vResulti)[0]);
|
||||
_mm_store_ss(reinterpret_cast<float *>(&pDestination->v),_mm_castsi128_ps(vResulti));
|
||||
#else // _XM_VMX128_INTRINSICS_
|
||||
#endif // _XM_VMX128_INTRINSICS_
|
||||
}
|
||||
@ -2553,7 +2553,7 @@ inline void PackedVector::XMStoreByteN4
|
||||
vResulti2 = _mm_shuffle_epi32(vResulti,_MM_SHUFFLE(1,1,1,1));
|
||||
// i = x|y|z|w
|
||||
vResulti = _mm_or_si128(vResulti,vResulti2);
|
||||
_mm_store_ss(reinterpret_cast<float *>(&pDestination->v),reinterpret_cast<const __m128 *>(&vResulti)[0]);
|
||||
_mm_store_ss(reinterpret_cast<float *>(&pDestination->v),_mm_castsi128_ps(vResulti));
|
||||
#else // _XM_VMX128_INTRINSICS_
|
||||
#endif // _XM_VMX128_INTRINSICS_
|
||||
}
|
||||
@ -2605,7 +2605,7 @@ inline void PackedVector::XMStoreByte4
|
||||
vResulti2 = _mm_shuffle_epi32(vResulti,_MM_SHUFFLE(1,1,1,1));
|
||||
// i = x|y|z|w
|
||||
vResulti = _mm_or_si128(vResulti,vResulti2);
|
||||
_mm_store_ss(reinterpret_cast<float *>(&pDestination->v),reinterpret_cast<const __m128 *>(&vResulti)[0]);
|
||||
_mm_store_ss(reinterpret_cast<float *>(&pDestination->v),_mm_castsi128_ps(vResulti));
|
||||
#else // _XM_VMX128_INTRINSICS_
|
||||
#endif // _XM_VMX128_INTRINSICS_
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user