Added bitwise inverse SIMD optimization. Factorized bitwise optimization code

This commit is contained in:
Christophe Riccio 2016-05-27 23:03:38 +02:00
parent 4797ea9540
commit f577611328
3 changed files with 54 additions and 43 deletions

View File

@ -4,6 +4,24 @@
namespace glm{ namespace glm{
namespace detail namespace detail
{ {
template <typename T>
struct is_int32
{
enum test {value = 0};
};
template <>
struct is_int32<uint32>
{
enum test {value = ~0};
};
template <>
struct is_int32<int32>
{
enum test {value = ~0};
};
template <typename T, precision P> template <typename T, precision P>
struct compute_vec4_add struct compute_vec4_add
{ {
@ -67,7 +85,7 @@ namespace detail
} }
}; };
template <typename T, precision P> template <typename T, precision P, int IsInt32>
struct compute_vec4_xor struct compute_vec4_xor
{ {
static tvec4<T, P> call(tvec4<T, P> const & a, tvec4<T, P> const & b) static tvec4<T, P> call(tvec4<T, P> const & a, tvec4<T, P> const & b)
@ -76,7 +94,7 @@ namespace detail
} }
}; };
template <typename T, precision P> template <typename T, precision P, int IsInt32>
struct compute_vec4_shift_left struct compute_vec4_shift_left
{ {
static tvec4<T, P> call(tvec4<T, P> const & a, tvec4<T, P> const & b) static tvec4<T, P> call(tvec4<T, P> const & a, tvec4<T, P> const & b)
@ -85,7 +103,7 @@ namespace detail
} }
}; };
template <typename T, precision P> template <typename T, precision P, int IsInt32>
struct compute_vec4_shift_right struct compute_vec4_shift_right
{ {
static tvec4<T, P> call(tvec4<T, P> const & a, tvec4<T, P> const & b) static tvec4<T, P> call(tvec4<T, P> const & a, tvec4<T, P> const & b)
@ -93,6 +111,15 @@ namespace detail
return tvec4<T, P>(a.x >> b.x, a.y >> b.y, a.z >> b.z, a.w >> b.w); return tvec4<T, P>(a.x >> b.x, a.y >> b.y, a.z >> b.z, a.w >> b.w);
} }
}; };
template <typename T, precision P, int IsInt32>
struct compute_vec4_logical_not
{
static tvec4<T, P> call(tvec4<T, P> const & v)
{
return tvec4<T, P>(~v.x, ~v.y, ~v.z, ~v.w);
}
};
}//namespace detail }//namespace detail
// -- Implicit basic constructors -- // -- Implicit basic constructors --
@ -883,7 +910,7 @@ namespace detail
template <typename T, precision P> template <typename T, precision P>
GLM_FUNC_QUALIFIER tvec4<T, P> operator~(tvec4<T, P> const & v) GLM_FUNC_QUALIFIER tvec4<T, P> operator~(tvec4<T, P> const & v)
{ {
return tvec4<T, P>(~v.x, ~v.y, ~v.z, ~v.w); return detail::compute_vec4_logical_not<T, P, detail::is_int32<T>::value>::call(v);
} }
// -- Boolean operators -- // -- Boolean operators --

View File

@ -105,68 +105,46 @@ namespace detail
} }
}; };
template <precision P> template <typename T, precision P>
struct compute_vec4_xor<int32, P> struct compute_vec4_xor<T, P, true>
{ {
static tvec4<int32, P> call(tvec4<int32, P> const& a, tvec4<int32, P> const& b) static tvec4<T, P> call(tvec4<T, P> const& a, tvec4<T, P> const& b)
{ {
tvec4<int32, P> Result(uninitialize); tvec4<T, P> Result(uninitialize);
Result.data = _mm_xor_si128(a.data, b.data); Result.data = _mm_xor_si128(a.data, b.data);
return Result; return Result;
} }
}; };
template <precision P> template <typename T, precision P>
struct compute_vec4_xor<uint32, P> struct compute_vec4_shift_left<T, P, true>
{ {
static tvec4<uint32, P> call(tvec4<uint32, P> const& a, tvec4<uint32, P> const& b) static tvec4<T, P> call(tvec4<T, P> const& a, tvec4<T, P> const& b)
{ {
tvec4<uint32, P> Result(uninitialize); tvec4<T, P> Result(uninitialize);
Result.data = _mm_xor_si128(a.data, b.data);
return Result;
}
};
template <precision P>
struct compute_vec4_shift_left<int32, P>
{
static tvec4<int32, P> call(tvec4<int32, P> const& a, tvec4<int32, P> const& b)
{
tvec4<int32, P> Result(uninitialize);
Result.data = _mm_sll_epi32(a.data, b.data); Result.data = _mm_sll_epi32(a.data, b.data);
return Result; return Result;
} }
}; };
template <precision P> template <typename T, precision P>
struct compute_vec4_shift_left<uint32, P> struct compute_vec4_shift_right<T, P, true>
{ {
static tvec4<uint32, P> call(tvec4<uint32, P> const& a, tvec4<uint32, P> const& b) static tvec4<T, P> call(tvec4<T, P> const& a, tvec4<T, P> const& b)
{ {
tvec4<uint32, P> Result(uninitialize); tvec4<T, P> Result(uninitialize);
Result.data = _mm_sll_epi32(a.data, b.data);
return Result;
}
};
template <precision P>
struct compute_vec4_shift_right<int32, P>
{
static tvec4<int32, P> call(tvec4<int32, P> const& a, tvec4<int32, P> const& b)
{
tvec4<int32, P> Result(uninitialize);
Result.data = _mm_srl_epi32(a.data, b.data); Result.data = _mm_srl_epi32(a.data, b.data);
return Result; return Result;
} }
}; };
template <precision P> template <typename T, precision P>
struct compute_vec4_shift_right<uint32, P> struct compute_vec4_logical_not<T, P, true>
{ {
static tvec4<uint32, P> call(tvec4<uint32, P> const& a, tvec4<uint32, P> const& b) static tvec4<T, P> call(tvec4<T, P> const & v)
{ {
tvec4<uint32, P> Result(uninitialize); tvec4<T, P> Result(uninitialize);
Result.data = _mm_srl_epi32(a.data, b.data); Result.data = _mm_xor_si128(v.data, _mm_set1_epi32(-1));
return Result; return Result;
} }
}; };

View File

@ -1237,6 +1237,12 @@ int main()
{ {
int Error = 0; int Error = 0;
glm::ivec4 const a(1);
glm::ivec4 const b = ~a;
glm::int32 const c(1);
glm::int32 const d = ~c;
Error += sign::test(); Error += sign::test();
Error += floor_::test(); Error += floor_::test();
Error += mod_::test(); Error += mod_::test();