1
0
mirror of https://github.com/microsoft/DirectXMath synced 2024-11-09 14:10:09 +00:00

XMFLOAT3X4 data type and load/store functions (#71)

This commit is contained in:
Chuck Walbourn 2018-06-01 10:47:08 -07:00 committed by GitHub
parent 9226cd4d0c
commit 0fad2114f8
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 374 additions and 2 deletions

View File

@ -748,7 +748,7 @@ struct XMFLOAT3X3
};
//------------------------------------------------------------------------------
// 4x3 Matrix: 32 bit floating point components
// 4x3 Row-major Matrix: 32 bit floating point components
struct XMFLOAT4X3
{
union
@ -761,6 +761,7 @@ struct XMFLOAT4X3
float _41, _42, _43;
};
float m[4][3];
float f[12];
};
XMFLOAT4X3() = default;
@ -785,7 +786,7 @@ struct XMFLOAT4X3
float& operator() (size_t Row, size_t Column) { return m[Row][Column]; }
};
// 4x3 Matrix: 32 bit floating point components aligned on a 16 byte boundary
// 4x3 Row-major Matrix: 32 bit floating point components aligned on a 16 byte boundary
__declspec(align(16)) struct XMFLOAT4X3A : public XMFLOAT4X3
{
XMFLOAT4X3A() = default;
@ -804,6 +805,60 @@ __declspec(align(16)) struct XMFLOAT4X3A : public XMFLOAT4X3
explicit XMFLOAT4X3A(_In_reads_(12) const float *pArray) : XMFLOAT4X3(pArray) {}
};
//------------------------------------------------------------------------------
// 3x4 Column-major Matrix: 32 bit floating point components
struct XMFLOAT3X4
{
union
{
struct
{
float _11, _12, _13, _14;
float _21, _22, _23, _24;
float _31, _32, _33, _34;
};
float m[3][4];
float f[12];
};
XMFLOAT3X4() = default;
XMFLOAT3X4(const XMFLOAT3X4&) = default;
XMFLOAT3X4& operator=(const XMFLOAT3X4&) = default;
XMFLOAT3X4(XMFLOAT3X4&&) = default;
XMFLOAT3X4& operator=(XMFLOAT3X4&&) = default;
XM_CONSTEXPR XMFLOAT3X4(float m00, float m01, float m02, float m03,
float m10, float m11, float m12, float m13,
float m20, float m21, float m22, float m23)
: _11(m00), _12(m01), _13(m02), _14(m03),
_21(m10), _22(m11), _23(m12), _24(m13),
_31(m20), _32(m21), _33(m22), _34(m23) {}
explicit XMFLOAT3X4(_In_reads_(12) const float *pArray);
float operator() (size_t Row, size_t Column) const { return m[Row][Column]; }
float& operator() (size_t Row, size_t Column) { return m[Row][Column]; }
};
// 3x4 Column-major Matrix: 32 bit floating point components aligned on a 16 byte boundary
__declspec(align(16)) struct XMFLOAT3X4A : public XMFLOAT3X4
{
XMFLOAT3X4A() = default;
XMFLOAT3X4A(const XMFLOAT3X4A&) = default;
XMFLOAT3X4A& operator=(const XMFLOAT3X4A&) = default;
XMFLOAT3X4A(XMFLOAT3X4A&&) = default;
XMFLOAT3X4A& operator=(XMFLOAT3X4A&&) = default;
XM_CONSTEXPR XMFLOAT3X4A(float m00, float m01, float m02, float m03,
float m10, float m11, float m12, float m13,
float m20, float m21, float m22, float m23) :
XMFLOAT3X4(m00, m01, m02, m03, m10, m11, m12, m13, m20, m21, m22, m23) {}
explicit XMFLOAT3X4A(_In_reads_(12) const float *pArray) : XMFLOAT3X4(pArray) {}
};
//------------------------------------------------------------------------------
// 4x4 Matrix: 32 bit floating point components
struct XMFLOAT4X4
@ -923,6 +978,8 @@ XMVECTOR XM_CALLCONV XMLoadUInt4(_In_ const XMUINT4* pSource);
XMMATRIX XM_CALLCONV XMLoadFloat3x3(_In_ const XMFLOAT3X3* pSource);
XMMATRIX XM_CALLCONV XMLoadFloat4x3(_In_ const XMFLOAT4X3* pSource);
XMMATRIX XM_CALLCONV XMLoadFloat4x3A(_In_ const XMFLOAT4X3A* pSource);
XMMATRIX XM_CALLCONV XMLoadFloat3x4(_In_ const XMFLOAT3X4* pSource);
XMMATRIX XM_CALLCONV XMLoadFloat3x4A(_In_ const XMFLOAT3X4A* pSource);
XMMATRIX XM_CALLCONV XMLoadFloat4x4(_In_ const XMFLOAT4X4* pSource);
XMMATRIX XM_CALLCONV XMLoadFloat4x4A(_In_ const XMFLOAT4X4A* pSource);
@ -959,6 +1016,8 @@ void XM_CALLCONV XMStoreUInt4(_Out_ XMUINT4* pDestination, _In_ FXMVE
void XM_CALLCONV XMStoreFloat3x3(_Out_ XMFLOAT3X3* pDestination, _In_ FXMMATRIX M);
void XM_CALLCONV XMStoreFloat4x3(_Out_ XMFLOAT4X3* pDestination, _In_ FXMMATRIX M);
void XM_CALLCONV XMStoreFloat4x3A(_Out_ XMFLOAT4X3A* pDestination, _In_ FXMMATRIX M);
void XM_CALLCONV XMStoreFloat3x4(_Out_ XMFLOAT3X4* pDestination, _In_ FXMMATRIX M);
void XM_CALLCONV XMStoreFloat3x4A(_Out_ XMFLOAT3X4A* pDestination, _In_ FXMMATRIX M);
void XM_CALLCONV XMStoreFloat4x4(_Out_ XMFLOAT4X4* pDestination, _In_ FXMMATRIX M);
void XM_CALLCONV XMStoreFloat4x4A(_Out_ XMFLOAT4X4A* pDestination, _In_ FXMMATRIX M);

View File

@ -1000,6 +1000,169 @@ inline XMMATRIX XM_CALLCONV XMLoadFloat4x3A
#endif
}
//------------------------------------------------------------------------------
_Use_decl_annotations_
inline XMMATRIX XM_CALLCONV XMLoadFloat3x4
(
const XMFLOAT3X4* pSource
)
{
assert(pSource);
#if defined(_XM_NO_INTRINSICS_)
XMMATRIX M;
M.r[0].vector4_f32[0] = pSource->m[0][0];
M.r[0].vector4_f32[1] = pSource->m[1][0];
M.r[0].vector4_f32[2] = pSource->m[2][0];
M.r[0].vector4_f32[3] = 0.0f;
M.r[1].vector4_f32[0] = pSource->m[0][1];
M.r[1].vector4_f32[1] = pSource->m[1][1];
M.r[1].vector4_f32[2] = pSource->m[2][1];
M.r[1].vector4_f32[3] = 0.0f;
M.r[2].vector4_f32[0] = pSource->m[0][2];
M.r[2].vector4_f32[1] = pSource->m[1][2];
M.r[2].vector4_f32[2] = pSource->m[2][2];
M.r[2].vector4_f32[3] = 0.0f;
M.r[3].vector4_f32[0] = pSource->m[0][3];
M.r[3].vector4_f32[1] = pSource->m[1][3];
M.r[3].vector4_f32[2] = pSource->m[2][3];
M.r[3].vector4_f32[3] = 1.0f;
return M;
#elif defined(_XM_ARM_NEON_INTRINSICS_)
float32x2x4_t vTemp0 = vld4_f32(&pSource->_11);
float32x4_t vTemp1 = vld1q_f32(&pSource->_31);
float32x2_t l = vget_low_f32(vTemp1);
float32x4_t T0 = vcombine_f32(vTemp0.val[0], l);
float32x2_t rl = vrev64_f32(l);
float32x4_t T1 = vcombine_f32(vTemp0.val[1], rl);
float32x2_t h = vget_high_f32(vTemp1);
float32x4_t T2 = vcombine_f32(vTemp0.val[2], h);
float32x2_t rh = vrev64_f32(h);
float32x4_t T3 = vcombine_f32(vTemp0.val[3], rh);
XMMATRIX M = {};
M.r[0] = vandq_u32(T0, g_XMMask3);
M.r[1] = vandq_u32(T1, g_XMMask3);
M.r[2] = vandq_u32(T2, g_XMMask3);
M.r[3] = vsetq_lane_f32(1.f, T3, 3);
return M;
#elif defined(_XM_SSE_INTRINSICS_)
XMMATRIX M;
M.r[0] = _mm_loadu_ps(&pSource->_11);
M.r[1] = _mm_loadu_ps(&pSource->_21);
M.r[2] = _mm_loadu_ps(&pSource->_31);
M.r[3] = g_XMIdentityR3;
// x.x,x.y,y.x,y.y
XMVECTOR vTemp1 = _mm_shuffle_ps(M.r[0], M.r[1], _MM_SHUFFLE(1, 0, 1, 0));
// x.z,x.w,y.z,y.w
XMVECTOR vTemp3 = _mm_shuffle_ps(M.r[0], M.r[1], _MM_SHUFFLE(3, 2, 3, 2));
// z.x,z.y,w.x,w.y
XMVECTOR vTemp2 = _mm_shuffle_ps(M.r[2], M.r[3], _MM_SHUFFLE(1, 0, 1, 0));
// z.z,z.w,w.z,w.w
XMVECTOR vTemp4 = _mm_shuffle_ps(M.r[2], M.r[3], _MM_SHUFFLE(3, 2, 3, 2));
XMMATRIX mResult;
// x.x,y.x,z.x,w.x
mResult.r[0] = _mm_shuffle_ps(vTemp1, vTemp2, _MM_SHUFFLE(2, 0, 2, 0));
// x.y,y.y,z.y,w.y
mResult.r[1] = _mm_shuffle_ps(vTemp1, vTemp2, _MM_SHUFFLE(3, 1, 3, 1));
// x.z,y.z,z.z,w.z
mResult.r[2] = _mm_shuffle_ps(vTemp3, vTemp4, _MM_SHUFFLE(2, 0, 2, 0));
// x.w,y.w,z.w,w.w
mResult.r[3] = _mm_shuffle_ps(vTemp3, vTemp4, _MM_SHUFFLE(3, 1, 3, 1));
return mResult;
#endif
}
//------------------------------------------------------------------------------
_Use_decl_annotations_
inline XMMATRIX XM_CALLCONV XMLoadFloat3x4A
(
const XMFLOAT3X4A* pSource
)
{
assert(pSource);
assert(((uintptr_t)pSource & 0xF) == 0);
#if defined(_XM_NO_INTRINSICS_)
XMMATRIX M;
M.r[0].vector4_f32[0] = pSource->m[0][0];
M.r[0].vector4_f32[1] = pSource->m[1][0];
M.r[0].vector4_f32[2] = pSource->m[2][0];
M.r[0].vector4_f32[3] = 0.0f;
M.r[1].vector4_f32[0] = pSource->m[0][1];
M.r[1].vector4_f32[1] = pSource->m[1][1];
M.r[1].vector4_f32[2] = pSource->m[2][1];
M.r[1].vector4_f32[3] = 0.0f;
M.r[2].vector4_f32[0] = pSource->m[0][2];
M.r[2].vector4_f32[1] = pSource->m[1][2];
M.r[2].vector4_f32[2] = pSource->m[2][2];
M.r[2].vector4_f32[3] = 0.0f;
M.r[3].vector4_f32[0] = pSource->m[0][3];
M.r[3].vector4_f32[1] = pSource->m[1][3];
M.r[3].vector4_f32[2] = pSource->m[2][3];
M.r[3].vector4_f32[3] = 1.0f;
return M;
#elif defined(_XM_ARM_NEON_INTRINSICS_)
float32x2x4_t vTemp0 = vld4_f32_ex(&pSource->_11, 128);
float32x4_t vTemp1 = vld1q_f32_ex(&pSource->_31, 128);
float32x2_t l = vget_low_f32(vTemp1);
float32x4_t T0 = vcombine_f32(vTemp0.val[0], l);
float32x2_t rl = vrev64_f32(l);
float32x4_t T1 = vcombine_f32(vTemp0.val[1], rl);
float32x2_t h = vget_high_f32(vTemp1);
float32x4_t T2 = vcombine_f32(vTemp0.val[2], h);
float32x2_t rh = vrev64_f32(h);
float32x4_t T3 = vcombine_f32(vTemp0.val[3], rh);
XMMATRIX M = {};
M.r[0] = vandq_u32(T0, g_XMMask3);
M.r[1] = vandq_u32(T1, g_XMMask3);
M.r[2] = vandq_u32(T2, g_XMMask3);
M.r[3] = vsetq_lane_f32(1.f, T3, 3);
return M;
#elif defined(_XM_SSE_INTRINSICS_)
XMMATRIX M;
M.r[0] = _mm_load_ps(&pSource->_11);
M.r[1] = _mm_load_ps(&pSource->_21);
M.r[2] = _mm_load_ps(&pSource->_31);
M.r[3] = g_XMIdentityR3;
// x.x,x.y,y.x,y.y
XMVECTOR vTemp1 = _mm_shuffle_ps(M.r[0], M.r[1], _MM_SHUFFLE(1, 0, 1, 0));
// x.z,x.w,y.z,y.w
XMVECTOR vTemp3 = _mm_shuffle_ps(M.r[0], M.r[1], _MM_SHUFFLE(3, 2, 3, 2));
// z.x,z.y,w.x,w.y
XMVECTOR vTemp2 = _mm_shuffle_ps(M.r[2], M.r[3], _MM_SHUFFLE(1, 0, 1, 0));
// z.z,z.w,w.z,w.w
XMVECTOR vTemp4 = _mm_shuffle_ps(M.r[2], M.r[3], _MM_SHUFFLE(3, 2, 3, 2));
XMMATRIX mResult;
// x.x,y.x,z.x,w.x
mResult.r[0] = _mm_shuffle_ps(vTemp1, vTemp2, _MM_SHUFFLE(2, 0, 2, 0));
// x.y,y.y,z.y,w.y
mResult.r[1] = _mm_shuffle_ps(vTemp1, vTemp2, _MM_SHUFFLE(3, 1, 3, 1));
// x.z,y.z,z.z,w.z
mResult.r[2] = _mm_shuffle_ps(vTemp3, vTemp4, _MM_SHUFFLE(2, 0, 2, 0));
// x.w,y.w,z.w,w.w
mResult.r[3] = _mm_shuffle_ps(vTemp3, vTemp4, _MM_SHUFFLE(3, 1, 3, 1));
return mResult;
#endif
}
//------------------------------------------------------------------------------
_Use_decl_annotations_
inline XMMATRIX XM_CALLCONV XMLoadFloat4x4
@ -1804,6 +1967,125 @@ inline void XM_CALLCONV XMStoreFloat4x3A
#endif
}
//------------------------------------------------------------------------------
_Use_decl_annotations_
inline void XM_CALLCONV XMStoreFloat3x4
(
XMFLOAT3X4* pDestination,
FXMMATRIX M
)
{
assert(pDestination);
#if defined(_XM_NO_INTRINSICS_)
pDestination->m[0][0] = M.r[0].vector4_f32[0];
pDestination->m[0][1] = M.r[1].vector4_f32[0];
pDestination->m[0][2] = M.r[2].vector4_f32[0];
pDestination->m[0][3] = M.r[3].vector4_f32[0];
pDestination->m[1][0] = M.r[0].vector4_f32[1];
pDestination->m[1][1] = M.r[1].vector4_f32[1];
pDestination->m[1][2] = M.r[2].vector4_f32[1];
pDestination->m[1][3] = M.r[3].vector4_f32[1];
pDestination->m[2][0] = M.r[0].vector4_f32[2];
pDestination->m[2][1] = M.r[1].vector4_f32[2];
pDestination->m[2][2] = M.r[2].vector4_f32[2];
pDestination->m[2][3] = M.r[3].vector4_f32[2];
#elif defined(_XM_ARM_NEON_INTRINSICS_)
float32x4x2_t P0 = vzipq_f32(M.r[0], M.r[2]);
float32x4x2_t P1 = vzipq_f32(M.r[1], M.r[3]);
float32x4x2_t T0 = vzipq_f32(P0.val[0], P1.val[0]);
float32x4x2_t T1 = vzipq_f32(P0.val[1], P1.val[1]);
vst1q_f32(&pDestination->m[0][0], T0.val[0]);
vst1q_f32(&pDestination->m[1][0], T0.val[1]);
vst1q_f32(&pDestination->m[2][0], T1.val[0]);
#elif defined(_XM_SSE_INTRINSICS_)
// x.x,x.y,y.x,y.y
XMVECTOR vTemp1 = _mm_shuffle_ps(M.r[0], M.r[1], _MM_SHUFFLE(1, 0, 1, 0));
// x.z,x.w,y.z,y.w
XMVECTOR vTemp3 = _mm_shuffle_ps(M.r[0], M.r[1], _MM_SHUFFLE(3, 2, 3, 2));
// z.x,z.y,w.x,w.y
XMVECTOR vTemp2 = _mm_shuffle_ps(M.r[2], M.r[3], _MM_SHUFFLE(1, 0, 1, 0));
// z.z,z.w,w.z,w.w
XMVECTOR vTemp4 = _mm_shuffle_ps(M.r[2], M.r[3], _MM_SHUFFLE(3, 2, 3, 2));
// x.x,y.x,z.x,w.x
XMVECTOR r0 = _mm_shuffle_ps(vTemp1, vTemp2, _MM_SHUFFLE(2, 0, 2, 0));
// x.y,y.y,z.y,w.y
XMVECTOR r1 = _mm_shuffle_ps(vTemp1, vTemp2, _MM_SHUFFLE(3, 1, 3, 1));
// x.z,y.z,z.z,w.z
XMVECTOR r2 = _mm_shuffle_ps(vTemp3, vTemp4, _MM_SHUFFLE(2, 0, 2, 0));
_mm_storeu_ps(&pDestination->m[0][0], r0);
_mm_storeu_ps(&pDestination->m[1][0], r1);
_mm_storeu_ps(&pDestination->m[2][0], r2);
#endif
}
//------------------------------------------------------------------------------
_Use_decl_annotations_
inline void XM_CALLCONV XMStoreFloat3x4A
(
XMFLOAT3X4A* pDestination,
FXMMATRIX M
)
{
assert(pDestination);
assert(((uintptr_t)pDestination & 0xF) == 0);
#if defined(_XM_NO_INTRINSICS_)
pDestination->m[0][0] = M.r[0].vector4_f32[0];
pDestination->m[0][1] = M.r[1].vector4_f32[0];
pDestination->m[0][2] = M.r[2].vector4_f32[0];
pDestination->m[0][3] = M.r[3].vector4_f32[0];
pDestination->m[1][0] = M.r[0].vector4_f32[1];
pDestination->m[1][1] = M.r[1].vector4_f32[1];
pDestination->m[1][2] = M.r[2].vector4_f32[1];
pDestination->m[1][3] = M.r[3].vector4_f32[1];
pDestination->m[2][0] = M.r[0].vector4_f32[2];
pDestination->m[2][1] = M.r[1].vector4_f32[2];
pDestination->m[2][2] = M.r[2].vector4_f32[2];
pDestination->m[2][3] = M.r[3].vector4_f32[2];
#elif defined(_XM_ARM_NEON_INTRINSICS_)
float32x4x2_t P0 = vzipq_f32(M.r[0], M.r[2]);
float32x4x2_t P1 = vzipq_f32(M.r[1], M.r[3]);
float32x4x2_t T0 = vzipq_f32(P0.val[0], P1.val[0]);
float32x4x2_t T1 = vzipq_f32(P0.val[1], P1.val[1]);
vst1q_f32_ex(&pDestination->m[0][0], T0.val[0], 128);
vst1q_f32_ex(&pDestination->m[1][0], T0.val[1], 128);
vst1q_f32_ex(&pDestination->m[2][0], T1.val[0], 128);
#elif defined(_XM_SSE_INTRINSICS_)
// x.x,x.y,y.x,y.y
XMVECTOR vTemp1 = _mm_shuffle_ps(M.r[0], M.r[1], _MM_SHUFFLE(1, 0, 1, 0));
// x.z,x.w,y.z,y.w
XMVECTOR vTemp3 = _mm_shuffle_ps(M.r[0], M.r[1], _MM_SHUFFLE(3, 2, 3, 2));
// z.x,z.y,w.x,w.y
XMVECTOR vTemp2 = _mm_shuffle_ps(M.r[2], M.r[3], _MM_SHUFFLE(1, 0, 1, 0));
// z.z,z.w,w.z,w.w
XMVECTOR vTemp4 = _mm_shuffle_ps(M.r[2], M.r[3], _MM_SHUFFLE(3, 2, 3, 2));
// x.x,y.x,z.x,w.x
XMVECTOR r0 = _mm_shuffle_ps(vTemp1, vTemp2, _MM_SHUFFLE(2, 0, 2, 0));
// x.y,y.y,z.y,w.y
XMVECTOR r1 = _mm_shuffle_ps(vTemp1, vTemp2, _MM_SHUFFLE(3, 1, 3, 1));
// x.z,y.z,z.z,w.z
XMVECTOR r2 = _mm_shuffle_ps(vTemp3, vTemp4, _MM_SHUFFLE(2, 0, 2, 0));
_mm_store_ps(&pDestination->m[0][0], r0);
_mm_store_ps(&pDestination->m[1][0], r1);
_mm_store_ps(&pDestination->m[2][0], r2);
#endif
}
//------------------------------------------------------------------------------
_Use_decl_annotations_
inline void XM_CALLCONV XMStoreFloat4x4

View File

@ -3244,6 +3244,37 @@ inline XMFLOAT4X3::XMFLOAT4X3
m[3][2] = pArray[11];
}
/****************************************************************************
*
* XMFLOAT3X4 operators
*
****************************************************************************/
//------------------------------------------------------------------------------
_Use_decl_annotations_
inline XMFLOAT3X4::XMFLOAT3X4
(
const float* pArray
)
{
assert(pArray != nullptr);
m[0][0] = pArray[0];
m[0][1] = pArray[1];
m[0][2] = pArray[2];
m[0][3] = pArray[3];
m[1][0] = pArray[4];
m[1][1] = pArray[5];
m[1][2] = pArray[6];
m[1][3] = pArray[7];
m[2][0] = pArray[8];
m[2][1] = pArray[9];
m[2][2] = pArray[10];
m[2][3] = pArray[11];
}
/****************************************************************************
*
* XMFLOAT4X4 operators