Added SIMD integer operations optimizations
This commit is contained in:
parent
1bbc2935a2
commit
84caa1092f
@ -21,40 +21,36 @@ namespace detail
|
|||||||
return Bits >= sizeof(T) * 8 ? ~static_cast<T>(0) : (static_cast<T>(1) << Bits) - static_cast<T>(1);
|
return Bits >= sizeof(T) * 8 ? ~static_cast<T>(0) : (static_cast<T>(1) << Bits) - static_cast<T>(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <bool EXEC = false>
|
template <typename T, glm::precision P, template <typename, glm::precision> class vecType, bool EXEC = false>
|
||||||
struct compute_bitfieldReverseStep
|
struct compute_bitfieldReverseStep
|
||||||
{
|
{
|
||||||
template <typename T, glm::precision P, template <class, glm::precision> class vecType>
|
|
||||||
GLM_FUNC_QUALIFIER static vecType<T, P> call(vecType<T, P> const & v, T, T)
|
GLM_FUNC_QUALIFIER static vecType<T, P> call(vecType<T, P> const & v, T, T)
|
||||||
{
|
{
|
||||||
return v;
|
return v;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
template <>
|
template <typename T, glm::precision P, template <typename, glm::precision> class vecType>
|
||||||
struct compute_bitfieldReverseStep<true>
|
struct compute_bitfieldReverseStep<T, P, vecType, true>
|
||||||
{
|
{
|
||||||
template <typename T, glm::precision P, template <class, glm::precision> class vecType>
|
|
||||||
GLM_FUNC_QUALIFIER static vecType<T, P> call(vecType<T, P> const & v, T Mask, T Shift)
|
GLM_FUNC_QUALIFIER static vecType<T, P> call(vecType<T, P> const & v, T Mask, T Shift)
|
||||||
{
|
{
|
||||||
return (v & Mask) << Shift | (v & (~Mask)) >> Shift;
|
return (v & Mask) << Shift | (v & (~Mask)) >> Shift;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
template <bool EXEC = false>
|
template <typename T, glm::precision P, template <typename, glm::precision> class vecType, bool EXEC = false>
|
||||||
struct compute_bitfieldBitCountStep
|
struct compute_bitfieldBitCountStep
|
||||||
{
|
{
|
||||||
template <typename T, glm::precision P, template <class, glm::precision> class vecType>
|
|
||||||
GLM_FUNC_QUALIFIER static vecType<T, P> call(vecType<T, P> const & v, T, T)
|
GLM_FUNC_QUALIFIER static vecType<T, P> call(vecType<T, P> const & v, T, T)
|
||||||
{
|
{
|
||||||
return v;
|
return v;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
template <>
|
template <typename T, glm::precision P, template <typename, glm::precision> class vecType>
|
||||||
struct compute_bitfieldBitCountStep<true>
|
struct compute_bitfieldBitCountStep<T, P, vecType, true>
|
||||||
{
|
{
|
||||||
template <typename T, glm::precision P, template <class, glm::precision> class vecType>
|
|
||||||
GLM_FUNC_QUALIFIER static vecType<T, P> call(vecType<T, P> const & v, T Mask, T Shift)
|
GLM_FUNC_QUALIFIER static vecType<T, P> call(vecType<T, P> const & v, T Mask, T Shift)
|
||||||
{
|
{
|
||||||
return (v & Mask) + ((v >> Shift) & Mask);
|
return (v & Mask) + ((v >> Shift) & Mask);
|
||||||
@ -293,12 +289,12 @@ namespace detail
|
|||||||
GLM_FUNC_QUALIFIER vecType<T, P> bitfieldReverse(vecType<T, P> const & v)
|
GLM_FUNC_QUALIFIER vecType<T, P> bitfieldReverse(vecType<T, P> const & v)
|
||||||
{
|
{
|
||||||
vecType<T, P> x(v);
|
vecType<T, P> x(v);
|
||||||
x = detail::compute_bitfieldReverseStep<sizeof(T) * 8 >= 2>::call(x, T(0x5555555555555555ull), static_cast<T>( 1));
|
x = detail::compute_bitfieldReverseStep<T, P, vecType, sizeof(T) * 8 >= 2>::call(x, T(0x5555555555555555ull), static_cast<T>( 1));
|
||||||
x = detail::compute_bitfieldReverseStep<sizeof(T) * 8 >= 4>::call(x, T(0x3333333333333333ull), static_cast<T>( 2));
|
x = detail::compute_bitfieldReverseStep<T, P, vecType, sizeof(T) * 8 >= 4>::call(x, T(0x3333333333333333ull), static_cast<T>( 2));
|
||||||
x = detail::compute_bitfieldReverseStep<sizeof(T) * 8 >= 8>::call(x, T(0x0F0F0F0F0F0F0F0Full), static_cast<T>( 4));
|
x = detail::compute_bitfieldReverseStep<T, P, vecType, sizeof(T) * 8 >= 8>::call(x, T(0x0F0F0F0F0F0F0F0Full), static_cast<T>( 4));
|
||||||
x = detail::compute_bitfieldReverseStep<sizeof(T) * 8 >= 16>::call(x, T(0x00FF00FF00FF00FFull), static_cast<T>( 8));
|
x = detail::compute_bitfieldReverseStep<T, P, vecType, sizeof(T) * 8 >= 16>::call(x, T(0x00FF00FF00FF00FFull), static_cast<T>( 8));
|
||||||
x = detail::compute_bitfieldReverseStep<sizeof(T) * 8 >= 32>::call(x, T(0x0000FFFF0000FFFFull), static_cast<T>(16));
|
x = detail::compute_bitfieldReverseStep<T, P, vecType, sizeof(T) * 8 >= 32>::call(x, T(0x0000FFFF0000FFFFull), static_cast<T>(16));
|
||||||
x = detail::compute_bitfieldReverseStep<sizeof(T) * 8 >= 64>::call(x, T(0x00000000FFFFFFFFull), static_cast<T>(32));
|
x = detail::compute_bitfieldReverseStep<T, P, vecType, sizeof(T) * 8 >= 64>::call(x, T(0x00000000FFFFFFFFull), static_cast<T>(32));
|
||||||
return x;
|
return x;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -313,12 +309,12 @@ namespace detail
|
|||||||
GLM_FUNC_QUALIFIER vecType<int, P> bitCount(vecType<T, P> const & v)
|
GLM_FUNC_QUALIFIER vecType<int, P> bitCount(vecType<T, P> const & v)
|
||||||
{
|
{
|
||||||
vecType<typename detail::make_unsigned<T>::type, P> x(*reinterpret_cast<vecType<typename detail::make_unsigned<T>::type, P> const *>(&v));
|
vecType<typename detail::make_unsigned<T>::type, P> x(*reinterpret_cast<vecType<typename detail::make_unsigned<T>::type, P> const *>(&v));
|
||||||
x = detail::compute_bitfieldBitCountStep<sizeof(T) * 8 >= 2>::call(x, typename detail::make_unsigned<T>::type(0x5555555555555555ull), typename detail::make_unsigned<T>::type( 1));
|
x = detail::compute_bitfieldBitCountStep<T, P, vecType, sizeof(T) * 8 >= 2>::call(x, typename detail::make_unsigned<T>::type(0x5555555555555555ull), typename detail::make_unsigned<T>::type( 1));
|
||||||
x = detail::compute_bitfieldBitCountStep<sizeof(T) * 8 >= 4>::call(x, typename detail::make_unsigned<T>::type(0x3333333333333333ull), typename detail::make_unsigned<T>::type( 2));
|
x = detail::compute_bitfieldBitCountStep<T, P, vecType, sizeof(T) * 8 >= 4>::call(x, typename detail::make_unsigned<T>::type(0x3333333333333333ull), typename detail::make_unsigned<T>::type( 2));
|
||||||
x = detail::compute_bitfieldBitCountStep<sizeof(T) * 8 >= 8>::call(x, typename detail::make_unsigned<T>::type(0x0F0F0F0F0F0F0F0Full), typename detail::make_unsigned<T>::type( 4));
|
x = detail::compute_bitfieldBitCountStep<T, P, vecType, sizeof(T) * 8 >= 8>::call(x, typename detail::make_unsigned<T>::type(0x0F0F0F0F0F0F0F0Full), typename detail::make_unsigned<T>::type( 4));
|
||||||
x = detail::compute_bitfieldBitCountStep<sizeof(T) * 8 >= 16>::call(x, typename detail::make_unsigned<T>::type(0x00FF00FF00FF00FFull), typename detail::make_unsigned<T>::type( 8));
|
x = detail::compute_bitfieldBitCountStep<T, P, vecType, sizeof(T) * 8 >= 16>::call(x, typename detail::make_unsigned<T>::type(0x00FF00FF00FF00FFull), typename detail::make_unsigned<T>::type( 8));
|
||||||
x = detail::compute_bitfieldBitCountStep<sizeof(T) * 8 >= 32>::call(x, typename detail::make_unsigned<T>::type(0x0000FFFF0000FFFFull), typename detail::make_unsigned<T>::type(16));
|
x = detail::compute_bitfieldBitCountStep<T, P, vecType, sizeof(T) * 8 >= 32>::call(x, typename detail::make_unsigned<T>::type(0x0000FFFF0000FFFFull), typename detail::make_unsigned<T>::type(16));
|
||||||
x = detail::compute_bitfieldBitCountStep<sizeof(T) * 8 >= 64>::call(x, typename detail::make_unsigned<T>::type(0x00000000FFFFFFFFull), typename detail::make_unsigned<T>::type(32));
|
x = detail::compute_bitfieldBitCountStep<T, P, vecType, sizeof(T) * 8 >= 64>::call(x, typename detail::make_unsigned<T>::type(0x00000000FFFFFFFFull), typename detail::make_unsigned<T>::type(32));
|
||||||
return vecType<int, P>(x);
|
return vecType<int, P>(x);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -8,7 +8,44 @@
|
|||||||
namespace glm{
|
namespace glm{
|
||||||
namespace detail
|
namespace detail
|
||||||
{
|
{
|
||||||
|
template <glm::precision P>
|
||||||
|
struct compute_bitfieldReverseStep<uint32, P, tvec4, true>
|
||||||
|
{
|
||||||
|
GLM_FUNC_QUALIFIER static tvec4<uint32, P> call(tvec4<uint32, P> const & v, uint32 Mask, uint32 Shift)
|
||||||
|
{
|
||||||
|
__m128i const set0 = v.data;
|
||||||
|
|
||||||
|
__m128i const set1 = _mm_set1_epi32(Mask);
|
||||||
|
__m128i const and1 = _mm_and_si128(set0, set1);
|
||||||
|
__m128i const sft1 = _mm_slli_epi32(and1, Shift);
|
||||||
|
|
||||||
|
__m128i const set2 = _mm_andnot_si128(set0, _mm_set1_epi32(-1));
|
||||||
|
__m128i const and2 = _mm_and_si128(set0, set2);
|
||||||
|
__m128i const sft2 = _mm_srai_epi32(and2, Shift);
|
||||||
|
|
||||||
|
__m128i const or0 = _mm_or_si128(sft1, sft2);
|
||||||
|
|
||||||
|
return or0;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template <glm::precision P>
|
||||||
|
struct compute_bitfieldBitCountStep<uint32, P, tvec4, true>
|
||||||
|
{
|
||||||
|
template <glm::precision P>
|
||||||
|
GLM_FUNC_QUALIFIER static tvec4<uint32, P> call(tvec4<uint32, P> const & v, uint32 Mask, uint32 Shift)
|
||||||
|
{
|
||||||
|
__m128i const set0 = v.data;
|
||||||
|
|
||||||
|
__m128i const set1 = _mm_set1_epi32(Mask);
|
||||||
|
__m128i const and0 = _mm_and_si128(set0, set1);
|
||||||
|
__m128i const sft0 = _mm_slli_epi32(set0, Shift);
|
||||||
|
__m128i const and1 = _mm_and_si128(sft0, set1);
|
||||||
|
__m128i const add0 = _mm_add_epi32(and0, and1);
|
||||||
|
|
||||||
|
return add0;
|
||||||
|
}
|
||||||
|
};
|
||||||
}//namespace detail
|
}//namespace detail
|
||||||
|
|
||||||
# if GLM_ARCH & GLM_ARCH_AVX_BIT
|
# if GLM_ARCH & GLM_ARCH_AVX_BIT
|
||||||
|
Loading…
Reference in New Issue
Block a user