Added SIMD integer operations optimizations
This commit is contained in:
parent
1bbc2935a2
commit
84caa1092f
@ -21,40 +21,36 @@ namespace detail
|
||||
return Bits >= sizeof(T) * 8 ? ~static_cast<T>(0) : (static_cast<T>(1) << Bits) - static_cast<T>(1);
|
||||
}
|
||||
|
||||
template <bool EXEC = false>
|
||||
template <typename T, glm::precision P, template <typename, glm::precision> class vecType, bool EXEC = false>
|
||||
struct compute_bitfieldReverseStep
|
||||
{
|
||||
template <typename T, glm::precision P, template <class, glm::precision> class vecType>
|
||||
GLM_FUNC_QUALIFIER static vecType<T, P> call(vecType<T, P> const & v, T, T)
|
||||
{
|
||||
return v;
|
||||
}
|
||||
};
|
||||
|
||||
template <>
|
||||
struct compute_bitfieldReverseStep<true>
|
||||
template <typename T, glm::precision P, template <typename, glm::precision> class vecType>
|
||||
struct compute_bitfieldReverseStep<T, P, vecType, true>
|
||||
{
|
||||
template <typename T, glm::precision P, template <class, glm::precision> class vecType>
|
||||
GLM_FUNC_QUALIFIER static vecType<T, P> call(vecType<T, P> const & v, T Mask, T Shift)
|
||||
{
|
||||
return (v & Mask) << Shift | (v & (~Mask)) >> Shift;
|
||||
}
|
||||
};
|
||||
|
||||
template <bool EXEC = false>
|
||||
template <typename T, glm::precision P, template <typename, glm::precision> class vecType, bool EXEC = false>
|
||||
struct compute_bitfieldBitCountStep
|
||||
{
|
||||
template <typename T, glm::precision P, template <class, glm::precision> class vecType>
|
||||
GLM_FUNC_QUALIFIER static vecType<T, P> call(vecType<T, P> const & v, T, T)
|
||||
{
|
||||
return v;
|
||||
}
|
||||
};
|
||||
|
||||
template <>
|
||||
struct compute_bitfieldBitCountStep<true>
|
||||
template <typename T, glm::precision P, template <typename, glm::precision> class vecType>
|
||||
struct compute_bitfieldBitCountStep<T, P, vecType, true>
|
||||
{
|
||||
template <typename T, glm::precision P, template <class, glm::precision> class vecType>
|
||||
GLM_FUNC_QUALIFIER static vecType<T, P> call(vecType<T, P> const & v, T Mask, T Shift)
|
||||
{
|
||||
return (v & Mask) + ((v >> Shift) & Mask);
|
||||
@ -293,12 +289,12 @@ namespace detail
|
||||
GLM_FUNC_QUALIFIER vecType<T, P> bitfieldReverse(vecType<T, P> const & v)
|
||||
{
|
||||
vecType<T, P> x(v);
|
||||
x = detail::compute_bitfieldReverseStep<sizeof(T) * 8 >= 2>::call(x, T(0x5555555555555555ull), static_cast<T>( 1));
|
||||
x = detail::compute_bitfieldReverseStep<sizeof(T) * 8 >= 4>::call(x, T(0x3333333333333333ull), static_cast<T>( 2));
|
||||
x = detail::compute_bitfieldReverseStep<sizeof(T) * 8 >= 8>::call(x, T(0x0F0F0F0F0F0F0F0Full), static_cast<T>( 4));
|
||||
x = detail::compute_bitfieldReverseStep<sizeof(T) * 8 >= 16>::call(x, T(0x00FF00FF00FF00FFull), static_cast<T>( 8));
|
||||
x = detail::compute_bitfieldReverseStep<sizeof(T) * 8 >= 32>::call(x, T(0x0000FFFF0000FFFFull), static_cast<T>(16));
|
||||
x = detail::compute_bitfieldReverseStep<sizeof(T) * 8 >= 64>::call(x, T(0x00000000FFFFFFFFull), static_cast<T>(32));
|
||||
x = detail::compute_bitfieldReverseStep<T, P, vecType, sizeof(T) * 8 >= 2>::call(x, T(0x5555555555555555ull), static_cast<T>( 1));
|
||||
x = detail::compute_bitfieldReverseStep<T, P, vecType, sizeof(T) * 8 >= 4>::call(x, T(0x3333333333333333ull), static_cast<T>( 2));
|
||||
x = detail::compute_bitfieldReverseStep<T, P, vecType, sizeof(T) * 8 >= 8>::call(x, T(0x0F0F0F0F0F0F0F0Full), static_cast<T>( 4));
|
||||
x = detail::compute_bitfieldReverseStep<T, P, vecType, sizeof(T) * 8 >= 16>::call(x, T(0x00FF00FF00FF00FFull), static_cast<T>( 8));
|
||||
x = detail::compute_bitfieldReverseStep<T, P, vecType, sizeof(T) * 8 >= 32>::call(x, T(0x0000FFFF0000FFFFull), static_cast<T>(16));
|
||||
x = detail::compute_bitfieldReverseStep<T, P, vecType, sizeof(T) * 8 >= 64>::call(x, T(0x00000000FFFFFFFFull), static_cast<T>(32));
|
||||
return x;
|
||||
}
|
||||
|
||||
@ -313,12 +309,12 @@ namespace detail
|
||||
GLM_FUNC_QUALIFIER vecType<int, P> bitCount(vecType<T, P> const & v)
|
||||
{
|
||||
vecType<typename detail::make_unsigned<T>::type, P> x(*reinterpret_cast<vecType<typename detail::make_unsigned<T>::type, P> const *>(&v));
|
||||
x = detail::compute_bitfieldBitCountStep<sizeof(T) * 8 >= 2>::call(x, typename detail::make_unsigned<T>::type(0x5555555555555555ull), typename detail::make_unsigned<T>::type( 1));
|
||||
x = detail::compute_bitfieldBitCountStep<sizeof(T) * 8 >= 4>::call(x, typename detail::make_unsigned<T>::type(0x3333333333333333ull), typename detail::make_unsigned<T>::type( 2));
|
||||
x = detail::compute_bitfieldBitCountStep<sizeof(T) * 8 >= 8>::call(x, typename detail::make_unsigned<T>::type(0x0F0F0F0F0F0F0F0Full), typename detail::make_unsigned<T>::type( 4));
|
||||
x = detail::compute_bitfieldBitCountStep<sizeof(T) * 8 >= 16>::call(x, typename detail::make_unsigned<T>::type(0x00FF00FF00FF00FFull), typename detail::make_unsigned<T>::type( 8));
|
||||
x = detail::compute_bitfieldBitCountStep<sizeof(T) * 8 >= 32>::call(x, typename detail::make_unsigned<T>::type(0x0000FFFF0000FFFFull), typename detail::make_unsigned<T>::type(16));
|
||||
x = detail::compute_bitfieldBitCountStep<sizeof(T) * 8 >= 64>::call(x, typename detail::make_unsigned<T>::type(0x00000000FFFFFFFFull), typename detail::make_unsigned<T>::type(32));
|
||||
x = detail::compute_bitfieldBitCountStep<T, P, vecType, sizeof(T) * 8 >= 2>::call(x, typename detail::make_unsigned<T>::type(0x5555555555555555ull), typename detail::make_unsigned<T>::type( 1));
|
||||
x = detail::compute_bitfieldBitCountStep<T, P, vecType, sizeof(T) * 8 >= 4>::call(x, typename detail::make_unsigned<T>::type(0x3333333333333333ull), typename detail::make_unsigned<T>::type( 2));
|
||||
x = detail::compute_bitfieldBitCountStep<T, P, vecType, sizeof(T) * 8 >= 8>::call(x, typename detail::make_unsigned<T>::type(0x0F0F0F0F0F0F0F0Full), typename detail::make_unsigned<T>::type( 4));
|
||||
x = detail::compute_bitfieldBitCountStep<T, P, vecType, sizeof(T) * 8 >= 16>::call(x, typename detail::make_unsigned<T>::type(0x00FF00FF00FF00FFull), typename detail::make_unsigned<T>::type( 8));
|
||||
x = detail::compute_bitfieldBitCountStep<T, P, vecType, sizeof(T) * 8 >= 32>::call(x, typename detail::make_unsigned<T>::type(0x0000FFFF0000FFFFull), typename detail::make_unsigned<T>::type(16));
|
||||
x = detail::compute_bitfieldBitCountStep<T, P, vecType, sizeof(T) * 8 >= 64>::call(x, typename detail::make_unsigned<T>::type(0x00000000FFFFFFFFull), typename detail::make_unsigned<T>::type(32));
|
||||
return vecType<int, P>(x);
|
||||
}
|
||||
|
||||
|
@ -8,7 +8,44 @@
|
||||
namespace glm{
|
||||
namespace detail
|
||||
{
|
||||
template <glm::precision P>
|
||||
struct compute_bitfieldReverseStep<uint32, P, tvec4, true>
|
||||
{
|
||||
GLM_FUNC_QUALIFIER static tvec4<uint32, P> call(tvec4<uint32, P> const & v, uint32 Mask, uint32 Shift)
|
||||
{
|
||||
__m128i const set0 = v.data;
|
||||
|
||||
__m128i const set1 = _mm_set1_epi32(Mask);
|
||||
__m128i const and1 = _mm_and_si128(set0, set1);
|
||||
__m128i const sft1 = _mm_slli_epi32(and1, Shift);
|
||||
|
||||
__m128i const set2 = _mm_andnot_si128(set0, _mm_set1_epi32(-1));
|
||||
__m128i const and2 = _mm_and_si128(set0, set2);
|
||||
__m128i const sft2 = _mm_srai_epi32(and2, Shift);
|
||||
|
||||
__m128i const or0 = _mm_or_si128(sft1, sft2);
|
||||
|
||||
return or0;
|
||||
}
|
||||
};
|
||||
|
||||
template <glm::precision P>
|
||||
struct compute_bitfieldBitCountStep<uint32, P, tvec4, true>
|
||||
{
|
||||
template <glm::precision P>
|
||||
GLM_FUNC_QUALIFIER static tvec4<uint32, P> call(tvec4<uint32, P> const & v, uint32 Mask, uint32 Shift)
|
||||
{
|
||||
__m128i const set0 = v.data;
|
||||
|
||||
__m128i const set1 = _mm_set1_epi32(Mask);
|
||||
__m128i const and0 = _mm_and_si128(set0, set1);
|
||||
__m128i const sft0 = _mm_slli_epi32(set0, Shift);
|
||||
__m128i const and1 = _mm_and_si128(sft0, set1);
|
||||
__m128i const add0 = _mm_add_epi32(and0, and1);
|
||||
|
||||
return add0;
|
||||
}
|
||||
};
|
||||
}//namespace detail
|
||||
|
||||
# if GLM_ARCH & GLM_ARCH_AVX_BIT
|
||||
|
Loading…
Reference in New Issue
Block a user