SIMD common functions optimizations

This commit is contained in:
Christophe Riccio 2016-05-29 19:14:39 +02:00
parent 2386237528
commit 28cb770d14
3 changed files with 47 additions and 15 deletions

View File

@ -283,6 +283,26 @@ namespace detail
return min(max(x, minVal), maxVal);
}
};
template <typename T, precision P, template <typename, precision> class vecType>
struct compute_step_vector
{
GLM_FUNC_QUALIFIER static vecType<T, P> call(vecType<T, P> const & edge, vecType<T, P> const & x)
{
return mix(vecType<T, P>(1), vecType<T, P>(0), glm::lessThan(x, edge));
}
};
template <typename T, precision P, template <typename, precision> class vecType>
struct compute_smoothstep_vector
{
GLM_FUNC_QUALIFIER static vecType<T, P> call(vecType<T, P> const & edge0, vecType<T, P> const & edge1, vecType<T, P> const & x)
{
GLM_STATIC_ASSERT(std::numeric_limits<T>::is_iec559, "'step' only accept floating-point inputs");
vecType<T, P> const tmp(clamp((x - edge0) / (edge1 - edge0), static_cast<T>(0), static_cast<T>(1)));
return tmp * tmp * (static_cast<T>(3) - static_cast<T>(2) * tmp);
}
};
}//namespace detail
template <typename genFIType>
@ -561,15 +581,13 @@ namespace detail
template <template <typename, precision> class vecType, typename T, precision P>
GLM_FUNC_QUALIFIER vecType<T, P> step(T edge, vecType<T, P> const & x)
{
GLM_STATIC_ASSERT(std::numeric_limits<T>::is_iec559, "'step' only accept floating-point inputs");
return mix(vecType<T, P>(1), vecType<T, P>(0), glm::lessThan(x, vecType<T, P>(edge)));
return detail::compute_step_vector<T, P, vecType>::call(vecType<T, P>(edge), x);
}
template <template <typename, precision> class vecType, typename T, precision P>
GLM_FUNC_QUALIFIER vecType<T, P> step(vecType<T, P> const & edge, vecType<T, P> const & x)
{
return mix(vecType<T, P>(1), vecType<T, P>(0), glm::lessThan(x, edge));
return detail::compute_step_vector<T, P, vecType>::call(edge, x);
}
// smoothstep
@ -585,19 +603,13 @@ namespace detail
template <typename T, precision P, template <typename, precision> class vecType>
GLM_FUNC_QUALIFIER vecType<T, P> smoothstep(T edge0, T edge1, vecType<T, P> const & x)
{
GLM_STATIC_ASSERT(std::numeric_limits<T>::is_iec559, "'smoothstep' only accept floating-point inputs");
vecType<T, P> const tmp(clamp((x - edge0) / (edge1 - edge0), static_cast<T>(0), static_cast<T>(1)));
return tmp * tmp * (static_cast<T>(3) - static_cast<T>(2) * tmp);
return detail::compute_smoothstep_vector<T, P, vecType>::call(vecType<T, P>(edge0), vecType<T, P>(edge1), x);
}
template <typename T, precision P, template <typename, precision> class vecType>
GLM_FUNC_QUALIFIER vecType<T, P> smoothstep(vecType<T, P> const & edge0, vecType<T, P> const & edge1, vecType<T, P> const & x)
{
GLM_STATIC_ASSERT(std::numeric_limits<T>::is_iec559, "'smoothstep' only accept floating-point inputs");
vecType<T, P> const tmp(clamp((x - edge0) / (edge1 - edge0), static_cast<T>(0), static_cast<T>(1)));
return tmp * tmp * (static_cast<T>(3) - static_cast<T>(2) * tmp);
return detail::compute_smoothstep_vector<T, P, vecType>::call(edge0, edge1, x);
}
# if GLM_HAS_CXX11_STL

View File

@ -203,8 +203,28 @@ namespace detail
return Result;
}
};
/* FIXME
template <precision P>
struct compute_step_vector<float, P, tvec4>
{
GLM_FUNC_QUALIFIER static tvec4<float, P> call(tvec4<float, P> const& edge, tvec4<float, P> const& x)
{
tvec4<float, P> result(uninitialize);
result.data = glm_f32v4_stp(edge.data, x.data);
return result;
}
};
*/
template <precision P>
struct compute_smoothstep_vector<float, P, tvec4>
{
GLM_FUNC_QUALIFIER static tvec4<float, P> call(tvec4<float, P> const& edge0, tvec4<float, P> const& edge1, tvec4<float, P> const& x)
{
tvec4<float, P> result(uninitialize);
result.data = glm_f32v4_ssp(edge0.data, edge1.data, x.data);
return result;
}
};
}//namespace detail
}//namespace glm

View File

@ -555,8 +555,8 @@ namespace step_
entry<float, glm::vec4> TestVec4Scalar [] =
{
{ 0.0f, glm::vec4(1.0f, 2.0f, 3.0f, 4.0f), glm::vec4(1.0f) },
{ 1.0f, glm::vec4(1.0f, 2.0f, 3.0f, 4.0f), glm::vec4(1.0f) },
{ 0.0f, glm::vec4(1.0f, 2.0f, 3.0f, 4.0f), glm::vec4(1.0f) },
{ 0.0f, glm::vec4(-1.0f, -2.0f, -3.0f, -4.0f), glm::vec4(0.0f) }
};