Fixed swizzle operators build using SIMD

This commit is contained in:
Christophe Riccio 2017-08-16 02:42:47 +02:00
parent 4c43e9e679
commit 211881abf9

View File

@ -7,14 +7,14 @@ namespace glm{
namespace detail namespace detail
{ {
# if GLM_SWIZZLE == GLM_SWIZZLE_ENABLED # if GLM_SWIZZLE == GLM_SWIZZLE_ENABLED
template<qualifier P, int E0, int E1, int E2, int E3> template<qualifier Q, int E0, int E1, int E2, int E3>
struct _swizzle_base1<4, float, P, E0,E1,E2,E3, true> : public _swizzle_base0<float, 4> struct _swizzle_base1<4, float, Q, E0,E1,E2,E3, true> : public _swizzle_base0<float, 4>
{ {
GLM_FUNC_QUALIFIER vec<4, float, P> operator ()() const GLM_FUNC_QUALIFIER vec<4, float, Q> operator ()() const
{ {
__m128 data = *reinterpret_cast<__m128 const*>(&this->_buffer); __m128 data = *reinterpret_cast<__m128 const*>(&this->_buffer);
vec<4, float, P> Result; vec<4, float, Q> Result;
# if GLM_ARCH & GLM_ARCH_AVX_BIT # if GLM_ARCH & GLM_ARCH_AVX_BIT
Result.data = _mm_permute_ps(data, _MM_SHUFFLE(E3, E2, E1, E0)); Result.data = _mm_permute_ps(data, _MM_SHUFFLE(E3, E2, E1, E0));
# else # else
@ -24,27 +24,27 @@ namespace detail
} }
}; };
template<qualifier P, int E0, int E1, int E2, int E3> template<qualifier Q, int E0, int E1, int E2, int E3>
struct _swizzle_base1<4, int32, P, E0,E1,E2,E3, true> : public _swizzle_base0<int32, 4> struct _swizzle_base1<4, int32, Q, E0,E1,E2,E3, true> : public _swizzle_base0<int32, 4>
{ {
GLM_FUNC_QUALIFIER vec<4, int32, P> operator ()() const GLM_FUNC_QUALIFIER vec<4, int32, Q> operator ()() const
{ {
__m128i data = *reinterpret_cast<__m128i const*>(&this->_buffer); __m128i data = *reinterpret_cast<__m128i const*>(&this->_buffer);
vec<4, int32, P> Result; vec<4, int32, Q> Result;
Result.data = _mm_shuffle_epi32(data, _MM_SHUFFLE(E3, E2, E1, E0)); Result.data = _mm_shuffle_epi32(data, _MM_SHUFFLE(E3, E2, E1, E0));
return Result; return Result;
} }
}; };
template<qualifier P, int E0, int E1, int E2, int E3> template<qualifier Q, int E0, int E1, int E2, int E3>
struct _swizzle_base1<4, uint32, P, E0,E1,E2,E3, true> : public _swizzle_base0<uint32, 4> struct _swizzle_base1<4, uint32, Q, E0,E1,E2,E3, true> : public _swizzle_base0<uint32, 4>
{ {
GLM_FUNC_QUALIFIER vec<4, uint32, P> operator ()() const GLM_FUNC_QUALIFIER vec<4, uint32, Q> operator ()() const
{ {
__m128i data = *reinterpret_cast<__m128i const*>(&this->_buffer); __m128i data = *reinterpret_cast<__m128i const*>(&this->_buffer);
vec<4, uint32, P> Result; vec<4, uint32, Q> Result;
Result.data = _mm_shuffle_epi32(data, _MM_SHUFFLE(E3, E2, E1, E0)); Result.data = _mm_shuffle_epi32(data, _MM_SHUFFLE(E3, E2, E1, E0));
return Result; return Result;
} }
@ -52,11 +52,11 @@ namespace detail
# endif// GLM_SWIZZLE == GLM_SWIZZLE_ENABLED # endif// GLM_SWIZZLE == GLM_SWIZZLE_ENABLED
template<qualifier Q> template<qualifier Q>
struct compute_vec4_add<float, P, true> struct compute_vec4_add<float, Q, true>
{ {
static vec<4, float, P> call(vec<4, float, Q> const& a, vec<4, float, Q> const& b) static vec<4, float, Q> call(vec<4, float, Q> const& a, vec<4, float, Q> const& b)
{ {
vec<4, float, P> Result; vec<4, float, Q> Result;
Result.data = _mm_add_ps(a.data, b.data); Result.data = _mm_add_ps(a.data, b.data);
return Result; return Result;
} }
@ -64,11 +64,11 @@ namespace detail
# if GLM_ARCH & GLM_ARCH_AVX_BIT # if GLM_ARCH & GLM_ARCH_AVX_BIT
template<qualifier Q> template<qualifier Q>
struct compute_vec4_add<double, P, true> struct compute_vec4_add<double, Q, true>
{ {
static vec<4, double, P> call(vec<4, double, Q> const& a, vec<4, double, Q> const& b) static vec<4, double, Q> call(vec<4, double, Q> const& a, vec<4, double, Q> const& b)
{ {
vec<4, double, P> Result; vec<4, double, Q> Result;
Result.data = _mm256_add_pd(a.data, b.data); Result.data = _mm256_add_pd(a.data, b.data);
return Result; return Result;
} }
@ -76,11 +76,11 @@ namespace detail
# endif # endif
template<qualifier Q> template<qualifier Q>
struct compute_vec4_sub<float, P, true> struct compute_vec4_sub<float, Q, true>
{ {
static vec<4, float, P> call(vec<4, float, Q> const& a, vec<4, float, Q> const& b) static vec<4, float, Q> call(vec<4, float, Q> const& a, vec<4, float, Q> const& b)
{ {
vec<4, float, P> Result; vec<4, float, Q> Result;
Result.data = _mm_sub_ps(a.data, b.data); Result.data = _mm_sub_ps(a.data, b.data);
return Result; return Result;
} }
@ -88,11 +88,11 @@ namespace detail
# if GLM_ARCH & GLM_ARCH_AVX_BIT # if GLM_ARCH & GLM_ARCH_AVX_BIT
template<qualifier Q> template<qualifier Q>
struct compute_vec4_sub<double, P, true> struct compute_vec4_sub<double, Q, true>
{ {
static vec<4, double, P> call(vec<4, double, Q> const& a, vec<4, double, Q> const& b) static vec<4, double, Q> call(vec<4, double, Q> const& a, vec<4, double, Q> const& b)
{ {
vec<4, double, P> Result; vec<4, double, Q> Result;
Result.data = _mm256_sub_pd(a.data, b.data); Result.data = _mm256_sub_pd(a.data, b.data);
return Result; return Result;
} }
@ -100,11 +100,11 @@ namespace detail
# endif # endif
template<qualifier Q> template<qualifier Q>
struct compute_vec4_mul<float, P, true> struct compute_vec4_mul<float, Q, true>
{ {
static vec<4, float, P> call(vec<4, float, Q> const& a, vec<4, float, Q> const& b) static vec<4, float, Q> call(vec<4, float, Q> const& a, vec<4, float, Q> const& b)
{ {
vec<4, float, P> Result; vec<4, float, Q> Result;
Result.data = _mm_mul_ps(a.data, b.data); Result.data = _mm_mul_ps(a.data, b.data);
return Result; return Result;
} }
@ -112,11 +112,11 @@ namespace detail
# if GLM_ARCH & GLM_ARCH_AVX_BIT # if GLM_ARCH & GLM_ARCH_AVX_BIT
template<qualifier Q> template<qualifier Q>
struct compute_vec4_mul<double, P, true> struct compute_vec4_mul<double, Q, true>
{ {
static vec<4, double, P> call(vec<4, double, Q> const& a, vec<4, double, Q> const& b) static vec<4, double, Q> call(vec<4, double, Q> const& a, vec<4, double, Q> const& b)
{ {
vec<4, double, P> Result; vec<4, double, Q> Result;
Result.data = _mm256_mul_pd(a.data, b.data); Result.data = _mm256_mul_pd(a.data, b.data);
return Result; return Result;
} }
@ -124,11 +124,11 @@ namespace detail
# endif # endif
template<qualifier Q> template<qualifier Q>
struct compute_vec4_div<float, P, true> struct compute_vec4_div<float, Q, true>
{ {
static vec<4, float, P> call(vec<4, float, Q> const& a, vec<4, float, Q> const& b) static vec<4, float, Q> call(vec<4, float, Q> const& a, vec<4, float, Q> const& b)
{ {
vec<4, float, P> Result; vec<4, float, Q> Result;
Result.data = _mm_div_ps(a.data, b.data); Result.data = _mm_div_ps(a.data, b.data);
return Result; return Result;
} }
@ -136,11 +136,11 @@ namespace detail
# if GLM_ARCH & GLM_ARCH_AVX_BIT # if GLM_ARCH & GLM_ARCH_AVX_BIT
template<qualifier Q> template<qualifier Q>
struct compute_vec4_div<double, P, true> struct compute_vec4_div<double, Q, true>
{ {
static vec<4, double, P> call(vec<4, double, Q> const& a, vec<4, double, Q> const& b) static vec<4, double, Q> call(vec<4, double, Q> const& a, vec<4, double, Q> const& b)
{ {
vec<4, double, P> Result; vec<4, double, Q> Result;
Result.data = _mm256_div_pd(a.data, b.data); Result.data = _mm256_div_pd(a.data, b.data);
return Result; return Result;
} }
@ -303,7 +303,7 @@ namespace detail
# endif # endif
template<qualifier Q> template<qualifier Q>
struct compute_vec4_equal<float, P, false, 32, true> struct compute_vec4_equal<float, Q, false, 32, true>
{ {
static bool call(vec<4, float, Q> const& v1, vec<4, float, Q> const& v2) static bool call(vec<4, float, Q> const& v1, vec<4, float, Q> const& v2)
{ {
@ -312,7 +312,7 @@ namespace detail
}; };
template<qualifier Q> template<qualifier Q>
struct compute_vec4_equal<int32, P, true, 32, true> struct compute_vec4_equal<int32, Q, true, 32, true>
{ {
static bool call(vec<4, int32, Q> const& v1, vec<4, int32, Q> const& v2) static bool call(vec<4, int32, Q> const& v1, vec<4, int32, Q> const& v2)
{ {
@ -321,7 +321,7 @@ namespace detail
}; };
template<qualifier Q> template<qualifier Q>
struct compute_vec4_nequal<float, P, false, 32, true> struct compute_vec4_nequal<float, Q, false, 32, true>
{ {
static bool call(vec<4, float, Q> const& v1, vec<4, float, Q> const& v2) static bool call(vec<4, float, Q> const& v1, vec<4, float, Q> const& v2)
{ {
@ -330,7 +330,7 @@ namespace detail
}; };
template<qualifier Q> template<qualifier Q>
struct compute_vec4_nequal<int32, P, true, 32, true> struct compute_vec4_nequal<int32, Q, true, 32, true>
{ {
static bool call(vec<4, int32, Q> const& v1, vec<4, int32, Q> const& v2) static bool call(vec<4, int32, Q> const& v1, vec<4, int32, Q> const& v2)
{ {