- Added workaround for a CUDA compiler bug (#186, #185)

This commit is contained in:
Christophe Riccio 2014-04-01 01:20:03 +02:00
parent e33136538d
commit a5d2a63ef3
4 changed files with 31 additions and 5 deletions

View File

@ -197,12 +197,22 @@ namespace detail
// sqrt
GLM_FUNC_QUALIFIER float sqrt(float x)
{
# ifdef __CUDACC__ // Wordaround for a CUDA compiler bug up to CUDA6
detail::tvec1<float, highp> tmp(detail::compute_sqrt<detail::tvec1, float, highp>::call(x));
return tmp.x;
# else
return detail::compute_sqrt<detail::tvec1, float, highp>::call(x).x;
# endif
}
GLM_FUNC_QUALIFIER double sqrt(double x)
{
# ifdef __CUDACC__ // Wordaround for a CUDA compiler bug up to CUDA6
detail::tvec1<double, highp> tmp(detail::compute_sqrt<detail::tvec1, double, highp>::call(x));
return tmp.x;
# else
return detail::compute_sqrt<detail::tvec1, double, highp>::call(x).x;
# endif
}
template <typename T, precision P, template <typename, precision> class vecType>

View File

@ -43,7 +43,12 @@ namespace detail
{
GLM_FUNC_QUALIFIER static T call(detail::tvec1<T, P> const & x, detail::tvec1<T, P> const & y)
{
# ifdef __CUDACC__ // Wordaround for a CUDA compiler bug up to CUDA6
detail::tvec1<T, P> tmp(x * y);
return tmp.x;
# else
return detail::tvec1<T, P>(x * y).x;
# endif
}
};

View File

@ -27,13 +27,23 @@ namespace glm
template <>
GLM_FUNC_QUALIFIER float fastInverseSqrt<float>(float const & x)
{
# ifdef __CUDACC__ // Wordaround for a CUDA compiler bug up to CUDA6
detail::tvec1<T, P> tmp(detail::compute_inversesqrt<detail::tvec1, float, lowp>::call(detail::tvec1<float, lowp>(x)));
return tmp.x;
# else
return detail::compute_inversesqrt<detail::tvec1, float, lowp>::call(detail::tvec1<float, lowp>(x)).x;
# endif
}
template <>
GLM_FUNC_QUALIFIER double fastInverseSqrt<double>(double const & x)
{
# ifdef __CUDACC__ // Wordaround for a CUDA compiler bug up to CUDA6
detail::tvec1<T, P> tmp(detail::compute_inversesqrt<detail::tvec1, double, lowp>::call(detail::tvec1<double, lowp>(x)));
return tmp.x;
# else
return detail::compute_inversesqrt<detail::tvec1, double, lowp>::call(detail::tvec1<double, lowp>(x)).x;
# endif
}
template <template <class, precision> class vecType, typename T, precision P>

View File

@ -51,6 +51,7 @@ GLM 0.9.5.3: 2014-0X-XX
- Fixed usubBorrow (#171)
- Fixed eulerAngle*** not consistent for right-handed coordinate system (#173)
- Added full tests for eulerAngle*** functions (#173)
- Added workaround for a CUDA compiler bug (#186, #185)
================================================================================
GLM 0.9.5.2: 2014-02-08