Added SIMD integer operations optimizations

This commit is contained in:
Christophe Riccio 2016-05-30 15:38:47 +02:00
parent 1bbc2935a2
commit 84caa1092f
2 changed files with 55 additions and 22 deletions

View File

@ -21,40 +21,36 @@ namespace detail
return Bits >= sizeof(T) * 8 ? ~static_cast<T>(0) : (static_cast<T>(1) << Bits) - static_cast<T>(1);
}
template <bool EXEC = false>
template <typename T, glm::precision P, template <typename, glm::precision> class vecType, bool EXEC = false>
struct compute_bitfieldReverseStep
{
template <typename T, glm::precision P, template <class, glm::precision> class vecType>
GLM_FUNC_QUALIFIER static vecType<T, P> call(vecType<T, P> const & v, T, T)
{
return v;
}
};
template <>
struct compute_bitfieldReverseStep<true>
template <typename T, glm::precision P, template <typename, glm::precision> class vecType>
struct compute_bitfieldReverseStep<T, P, vecType, true>
{
template <typename T, glm::precision P, template <class, glm::precision> class vecType>
GLM_FUNC_QUALIFIER static vecType<T, P> call(vecType<T, P> const & v, T Mask, T Shift)
{
return (v & Mask) << Shift | (v & (~Mask)) >> Shift;
}
};
template <bool EXEC = false>
template <typename T, glm::precision P, template <typename, glm::precision> class vecType, bool EXEC = false>
struct compute_bitfieldBitCountStep
{
template <typename T, glm::precision P, template <class, glm::precision> class vecType>
GLM_FUNC_QUALIFIER static vecType<T, P> call(vecType<T, P> const & v, T, T)
{
return v;
}
};
template <>
struct compute_bitfieldBitCountStep<true>
template <typename T, glm::precision P, template <typename, glm::precision> class vecType>
struct compute_bitfieldBitCountStep<T, P, vecType, true>
{
template <typename T, glm::precision P, template <class, glm::precision> class vecType>
GLM_FUNC_QUALIFIER static vecType<T, P> call(vecType<T, P> const & v, T Mask, T Shift)
{
return (v & Mask) + ((v >> Shift) & Mask);
@ -293,12 +289,12 @@ namespace detail
GLM_FUNC_QUALIFIER vecType<T, P> bitfieldReverse(vecType<T, P> const & v)
{
vecType<T, P> x(v);
x = detail::compute_bitfieldReverseStep<sizeof(T) * 8 >= 2>::call(x, T(0x5555555555555555ull), static_cast<T>( 1));
x = detail::compute_bitfieldReverseStep<sizeof(T) * 8 >= 4>::call(x, T(0x3333333333333333ull), static_cast<T>( 2));
x = detail::compute_bitfieldReverseStep<sizeof(T) * 8 >= 8>::call(x, T(0x0F0F0F0F0F0F0F0Full), static_cast<T>( 4));
x = detail::compute_bitfieldReverseStep<sizeof(T) * 8 >= 16>::call(x, T(0x00FF00FF00FF00FFull), static_cast<T>( 8));
x = detail::compute_bitfieldReverseStep<sizeof(T) * 8 >= 32>::call(x, T(0x0000FFFF0000FFFFull), static_cast<T>(16));
x = detail::compute_bitfieldReverseStep<sizeof(T) * 8 >= 64>::call(x, T(0x00000000FFFFFFFFull), static_cast<T>(32));
x = detail::compute_bitfieldReverseStep<T, P, vecType, sizeof(T) * 8 >= 2>::call(x, T(0x5555555555555555ull), static_cast<T>( 1));
x = detail::compute_bitfieldReverseStep<T, P, vecType, sizeof(T) * 8 >= 4>::call(x, T(0x3333333333333333ull), static_cast<T>( 2));
x = detail::compute_bitfieldReverseStep<T, P, vecType, sizeof(T) * 8 >= 8>::call(x, T(0x0F0F0F0F0F0F0F0Full), static_cast<T>( 4));
x = detail::compute_bitfieldReverseStep<T, P, vecType, sizeof(T) * 8 >= 16>::call(x, T(0x00FF00FF00FF00FFull), static_cast<T>( 8));
x = detail::compute_bitfieldReverseStep<T, P, vecType, sizeof(T) * 8 >= 32>::call(x, T(0x0000FFFF0000FFFFull), static_cast<T>(16));
x = detail::compute_bitfieldReverseStep<T, P, vecType, sizeof(T) * 8 >= 64>::call(x, T(0x00000000FFFFFFFFull), static_cast<T>(32));
return x;
}
@ -313,12 +309,12 @@ namespace detail
GLM_FUNC_QUALIFIER vecType<int, P> bitCount(vecType<T, P> const & v)
{
vecType<typename detail::make_unsigned<T>::type, P> x(*reinterpret_cast<vecType<typename detail::make_unsigned<T>::type, P> const *>(&v));
x = detail::compute_bitfieldBitCountStep<sizeof(T) * 8 >= 2>::call(x, typename detail::make_unsigned<T>::type(0x5555555555555555ull), typename detail::make_unsigned<T>::type( 1));
x = detail::compute_bitfieldBitCountStep<sizeof(T) * 8 >= 4>::call(x, typename detail::make_unsigned<T>::type(0x3333333333333333ull), typename detail::make_unsigned<T>::type( 2));
x = detail::compute_bitfieldBitCountStep<sizeof(T) * 8 >= 8>::call(x, typename detail::make_unsigned<T>::type(0x0F0F0F0F0F0F0F0Full), typename detail::make_unsigned<T>::type( 4));
x = detail::compute_bitfieldBitCountStep<sizeof(T) * 8 >= 16>::call(x, typename detail::make_unsigned<T>::type(0x00FF00FF00FF00FFull), typename detail::make_unsigned<T>::type( 8));
x = detail::compute_bitfieldBitCountStep<sizeof(T) * 8 >= 32>::call(x, typename detail::make_unsigned<T>::type(0x0000FFFF0000FFFFull), typename detail::make_unsigned<T>::type(16));
x = detail::compute_bitfieldBitCountStep<sizeof(T) * 8 >= 64>::call(x, typename detail::make_unsigned<T>::type(0x00000000FFFFFFFFull), typename detail::make_unsigned<T>::type(32));
x = detail::compute_bitfieldBitCountStep<T, P, vecType, sizeof(T) * 8 >= 2>::call(x, typename detail::make_unsigned<T>::type(0x5555555555555555ull), typename detail::make_unsigned<T>::type( 1));
x = detail::compute_bitfieldBitCountStep<T, P, vecType, sizeof(T) * 8 >= 4>::call(x, typename detail::make_unsigned<T>::type(0x3333333333333333ull), typename detail::make_unsigned<T>::type( 2));
x = detail::compute_bitfieldBitCountStep<T, P, vecType, sizeof(T) * 8 >= 8>::call(x, typename detail::make_unsigned<T>::type(0x0F0F0F0F0F0F0F0Full), typename detail::make_unsigned<T>::type( 4));
x = detail::compute_bitfieldBitCountStep<T, P, vecType, sizeof(T) * 8 >= 16>::call(x, typename detail::make_unsigned<T>::type(0x00FF00FF00FF00FFull), typename detail::make_unsigned<T>::type( 8));
x = detail::compute_bitfieldBitCountStep<T, P, vecType, sizeof(T) * 8 >= 32>::call(x, typename detail::make_unsigned<T>::type(0x0000FFFF0000FFFFull), typename detail::make_unsigned<T>::type(16));
x = detail::compute_bitfieldBitCountStep<T, P, vecType, sizeof(T) * 8 >= 64>::call(x, typename detail::make_unsigned<T>::type(0x00000000FFFFFFFFull), typename detail::make_unsigned<T>::type(32));
return vecType<int, P>(x);
}

View File

@ -8,7 +8,44 @@
namespace glm{
namespace detail
{
template <glm::precision P>
struct compute_bitfieldReverseStep<uint32, P, tvec4, true>
{
GLM_FUNC_QUALIFIER static tvec4<uint32, P> call(tvec4<uint32, P> const & v, uint32 Mask, uint32 Shift)
{
__m128i const set0 = v.data;
__m128i const set1 = _mm_set1_epi32(Mask);
__m128i const and1 = _mm_and_si128(set0, set1);
__m128i const sft1 = _mm_slli_epi32(and1, Shift);
__m128i const set2 = _mm_andnot_si128(set0, _mm_set1_epi32(-1));
__m128i const and2 = _mm_and_si128(set0, set2);
__m128i const sft2 = _mm_srai_epi32(and2, Shift);
__m128i const or0 = _mm_or_si128(sft1, sft2);
return or0;
}
};
template <glm::precision P>
struct compute_bitfieldBitCountStep<uint32, P, tvec4, true>
{
template <glm::precision P>
GLM_FUNC_QUALIFIER static tvec4<uint32, P> call(tvec4<uint32, P> const & v, uint32 Mask, uint32 Shift)
{
__m128i const set0 = v.data;
__m128i const set1 = _mm_set1_epi32(Mask);
__m128i const and0 = _mm_and_si128(set0, set1);
__m128i const sft0 = _mm_slli_epi32(set0, Shift);
__m128i const and1 = _mm_and_si128(sft0, set1);
__m128i const add0 = _mm_add_epi32(and0, and1);
return add0;
}
};
}//namespace detail
# if GLM_ARCH & GLM_ARCH_AVX_BIT