Fixed swizzle operators build using SIMD
This commit is contained in:
parent
4c43e9e679
commit
211881abf9
@ -7,14 +7,14 @@ namespace glm{
|
|||||||
namespace detail
|
namespace detail
|
||||||
{
|
{
|
||||||
# if GLM_SWIZZLE == GLM_SWIZZLE_ENABLED
|
# if GLM_SWIZZLE == GLM_SWIZZLE_ENABLED
|
||||||
template<qualifier P, int E0, int E1, int E2, int E3>
|
template<qualifier Q, int E0, int E1, int E2, int E3>
|
||||||
struct _swizzle_base1<4, float, P, E0,E1,E2,E3, true> : public _swizzle_base0<float, 4>
|
struct _swizzle_base1<4, float, Q, E0,E1,E2,E3, true> : public _swizzle_base0<float, 4>
|
||||||
{
|
{
|
||||||
GLM_FUNC_QUALIFIER vec<4, float, P> operator ()() const
|
GLM_FUNC_QUALIFIER vec<4, float, Q> operator ()() const
|
||||||
{
|
{
|
||||||
__m128 data = *reinterpret_cast<__m128 const*>(&this->_buffer);
|
__m128 data = *reinterpret_cast<__m128 const*>(&this->_buffer);
|
||||||
|
|
||||||
vec<4, float, P> Result;
|
vec<4, float, Q> Result;
|
||||||
# if GLM_ARCH & GLM_ARCH_AVX_BIT
|
# if GLM_ARCH & GLM_ARCH_AVX_BIT
|
||||||
Result.data = _mm_permute_ps(data, _MM_SHUFFLE(E3, E2, E1, E0));
|
Result.data = _mm_permute_ps(data, _MM_SHUFFLE(E3, E2, E1, E0));
|
||||||
# else
|
# else
|
||||||
@ -24,27 +24,27 @@ namespace detail
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
template<qualifier P, int E0, int E1, int E2, int E3>
|
template<qualifier Q, int E0, int E1, int E2, int E3>
|
||||||
struct _swizzle_base1<4, int32, P, E0,E1,E2,E3, true> : public _swizzle_base0<int32, 4>
|
struct _swizzle_base1<4, int32, Q, E0,E1,E2,E3, true> : public _swizzle_base0<int32, 4>
|
||||||
{
|
{
|
||||||
GLM_FUNC_QUALIFIER vec<4, int32, P> operator ()() const
|
GLM_FUNC_QUALIFIER vec<4, int32, Q> operator ()() const
|
||||||
{
|
{
|
||||||
__m128i data = *reinterpret_cast<__m128i const*>(&this->_buffer);
|
__m128i data = *reinterpret_cast<__m128i const*>(&this->_buffer);
|
||||||
|
|
||||||
vec<4, int32, P> Result;
|
vec<4, int32, Q> Result;
|
||||||
Result.data = _mm_shuffle_epi32(data, _MM_SHUFFLE(E3, E2, E1, E0));
|
Result.data = _mm_shuffle_epi32(data, _MM_SHUFFLE(E3, E2, E1, E0));
|
||||||
return Result;
|
return Result;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
template<qualifier P, int E0, int E1, int E2, int E3>
|
template<qualifier Q, int E0, int E1, int E2, int E3>
|
||||||
struct _swizzle_base1<4, uint32, P, E0,E1,E2,E3, true> : public _swizzle_base0<uint32, 4>
|
struct _swizzle_base1<4, uint32, Q, E0,E1,E2,E3, true> : public _swizzle_base0<uint32, 4>
|
||||||
{
|
{
|
||||||
GLM_FUNC_QUALIFIER vec<4, uint32, P> operator ()() const
|
GLM_FUNC_QUALIFIER vec<4, uint32, Q> operator ()() const
|
||||||
{
|
{
|
||||||
__m128i data = *reinterpret_cast<__m128i const*>(&this->_buffer);
|
__m128i data = *reinterpret_cast<__m128i const*>(&this->_buffer);
|
||||||
|
|
||||||
vec<4, uint32, P> Result;
|
vec<4, uint32, Q> Result;
|
||||||
Result.data = _mm_shuffle_epi32(data, _MM_SHUFFLE(E3, E2, E1, E0));
|
Result.data = _mm_shuffle_epi32(data, _MM_SHUFFLE(E3, E2, E1, E0));
|
||||||
return Result;
|
return Result;
|
||||||
}
|
}
|
||||||
@ -52,11 +52,11 @@ namespace detail
|
|||||||
# endif// GLM_SWIZZLE == GLM_SWIZZLE_ENABLED
|
# endif// GLM_SWIZZLE == GLM_SWIZZLE_ENABLED
|
||||||
|
|
||||||
template<qualifier Q>
|
template<qualifier Q>
|
||||||
struct compute_vec4_add<float, P, true>
|
struct compute_vec4_add<float, Q, true>
|
||||||
{
|
{
|
||||||
static vec<4, float, P> call(vec<4, float, Q> const& a, vec<4, float, Q> const& b)
|
static vec<4, float, Q> call(vec<4, float, Q> const& a, vec<4, float, Q> const& b)
|
||||||
{
|
{
|
||||||
vec<4, float, P> Result;
|
vec<4, float, Q> Result;
|
||||||
Result.data = _mm_add_ps(a.data, b.data);
|
Result.data = _mm_add_ps(a.data, b.data);
|
||||||
return Result;
|
return Result;
|
||||||
}
|
}
|
||||||
@ -64,11 +64,11 @@ namespace detail
|
|||||||
|
|
||||||
# if GLM_ARCH & GLM_ARCH_AVX_BIT
|
# if GLM_ARCH & GLM_ARCH_AVX_BIT
|
||||||
template<qualifier Q>
|
template<qualifier Q>
|
||||||
struct compute_vec4_add<double, P, true>
|
struct compute_vec4_add<double, Q, true>
|
||||||
{
|
{
|
||||||
static vec<4, double, P> call(vec<4, double, Q> const& a, vec<4, double, Q> const& b)
|
static vec<4, double, Q> call(vec<4, double, Q> const& a, vec<4, double, Q> const& b)
|
||||||
{
|
{
|
||||||
vec<4, double, P> Result;
|
vec<4, double, Q> Result;
|
||||||
Result.data = _mm256_add_pd(a.data, b.data);
|
Result.data = _mm256_add_pd(a.data, b.data);
|
||||||
return Result;
|
return Result;
|
||||||
}
|
}
|
||||||
@ -76,11 +76,11 @@ namespace detail
|
|||||||
# endif
|
# endif
|
||||||
|
|
||||||
template<qualifier Q>
|
template<qualifier Q>
|
||||||
struct compute_vec4_sub<float, P, true>
|
struct compute_vec4_sub<float, Q, true>
|
||||||
{
|
{
|
||||||
static vec<4, float, P> call(vec<4, float, Q> const& a, vec<4, float, Q> const& b)
|
static vec<4, float, Q> call(vec<4, float, Q> const& a, vec<4, float, Q> const& b)
|
||||||
{
|
{
|
||||||
vec<4, float, P> Result;
|
vec<4, float, Q> Result;
|
||||||
Result.data = _mm_sub_ps(a.data, b.data);
|
Result.data = _mm_sub_ps(a.data, b.data);
|
||||||
return Result;
|
return Result;
|
||||||
}
|
}
|
||||||
@ -88,11 +88,11 @@ namespace detail
|
|||||||
|
|
||||||
# if GLM_ARCH & GLM_ARCH_AVX_BIT
|
# if GLM_ARCH & GLM_ARCH_AVX_BIT
|
||||||
template<qualifier Q>
|
template<qualifier Q>
|
||||||
struct compute_vec4_sub<double, P, true>
|
struct compute_vec4_sub<double, Q, true>
|
||||||
{
|
{
|
||||||
static vec<4, double, P> call(vec<4, double, Q> const& a, vec<4, double, Q> const& b)
|
static vec<4, double, Q> call(vec<4, double, Q> const& a, vec<4, double, Q> const& b)
|
||||||
{
|
{
|
||||||
vec<4, double, P> Result;
|
vec<4, double, Q> Result;
|
||||||
Result.data = _mm256_sub_pd(a.data, b.data);
|
Result.data = _mm256_sub_pd(a.data, b.data);
|
||||||
return Result;
|
return Result;
|
||||||
}
|
}
|
||||||
@ -100,11 +100,11 @@ namespace detail
|
|||||||
# endif
|
# endif
|
||||||
|
|
||||||
template<qualifier Q>
|
template<qualifier Q>
|
||||||
struct compute_vec4_mul<float, P, true>
|
struct compute_vec4_mul<float, Q, true>
|
||||||
{
|
{
|
||||||
static vec<4, float, P> call(vec<4, float, Q> const& a, vec<4, float, Q> const& b)
|
static vec<4, float, Q> call(vec<4, float, Q> const& a, vec<4, float, Q> const& b)
|
||||||
{
|
{
|
||||||
vec<4, float, P> Result;
|
vec<4, float, Q> Result;
|
||||||
Result.data = _mm_mul_ps(a.data, b.data);
|
Result.data = _mm_mul_ps(a.data, b.data);
|
||||||
return Result;
|
return Result;
|
||||||
}
|
}
|
||||||
@ -112,11 +112,11 @@ namespace detail
|
|||||||
|
|
||||||
# if GLM_ARCH & GLM_ARCH_AVX_BIT
|
# if GLM_ARCH & GLM_ARCH_AVX_BIT
|
||||||
template<qualifier Q>
|
template<qualifier Q>
|
||||||
struct compute_vec4_mul<double, P, true>
|
struct compute_vec4_mul<double, Q, true>
|
||||||
{
|
{
|
||||||
static vec<4, double, P> call(vec<4, double, Q> const& a, vec<4, double, Q> const& b)
|
static vec<4, double, Q> call(vec<4, double, Q> const& a, vec<4, double, Q> const& b)
|
||||||
{
|
{
|
||||||
vec<4, double, P> Result;
|
vec<4, double, Q> Result;
|
||||||
Result.data = _mm256_mul_pd(a.data, b.data);
|
Result.data = _mm256_mul_pd(a.data, b.data);
|
||||||
return Result;
|
return Result;
|
||||||
}
|
}
|
||||||
@ -124,11 +124,11 @@ namespace detail
|
|||||||
# endif
|
# endif
|
||||||
|
|
||||||
template<qualifier Q>
|
template<qualifier Q>
|
||||||
struct compute_vec4_div<float, P, true>
|
struct compute_vec4_div<float, Q, true>
|
||||||
{
|
{
|
||||||
static vec<4, float, P> call(vec<4, float, Q> const& a, vec<4, float, Q> const& b)
|
static vec<4, float, Q> call(vec<4, float, Q> const& a, vec<4, float, Q> const& b)
|
||||||
{
|
{
|
||||||
vec<4, float, P> Result;
|
vec<4, float, Q> Result;
|
||||||
Result.data = _mm_div_ps(a.data, b.data);
|
Result.data = _mm_div_ps(a.data, b.data);
|
||||||
return Result;
|
return Result;
|
||||||
}
|
}
|
||||||
@ -136,11 +136,11 @@ namespace detail
|
|||||||
|
|
||||||
# if GLM_ARCH & GLM_ARCH_AVX_BIT
|
# if GLM_ARCH & GLM_ARCH_AVX_BIT
|
||||||
template<qualifier Q>
|
template<qualifier Q>
|
||||||
struct compute_vec4_div<double, P, true>
|
struct compute_vec4_div<double, Q, true>
|
||||||
{
|
{
|
||||||
static vec<4, double, P> call(vec<4, double, Q> const& a, vec<4, double, Q> const& b)
|
static vec<4, double, Q> call(vec<4, double, Q> const& a, vec<4, double, Q> const& b)
|
||||||
{
|
{
|
||||||
vec<4, double, P> Result;
|
vec<4, double, Q> Result;
|
||||||
Result.data = _mm256_div_pd(a.data, b.data);
|
Result.data = _mm256_div_pd(a.data, b.data);
|
||||||
return Result;
|
return Result;
|
||||||
}
|
}
|
||||||
@ -303,7 +303,7 @@ namespace detail
|
|||||||
# endif
|
# endif
|
||||||
|
|
||||||
template<qualifier Q>
|
template<qualifier Q>
|
||||||
struct compute_vec4_equal<float, P, false, 32, true>
|
struct compute_vec4_equal<float, Q, false, 32, true>
|
||||||
{
|
{
|
||||||
static bool call(vec<4, float, Q> const& v1, vec<4, float, Q> const& v2)
|
static bool call(vec<4, float, Q> const& v1, vec<4, float, Q> const& v2)
|
||||||
{
|
{
|
||||||
@ -312,7 +312,7 @@ namespace detail
|
|||||||
};
|
};
|
||||||
|
|
||||||
template<qualifier Q>
|
template<qualifier Q>
|
||||||
struct compute_vec4_equal<int32, P, true, 32, true>
|
struct compute_vec4_equal<int32, Q, true, 32, true>
|
||||||
{
|
{
|
||||||
static bool call(vec<4, int32, Q> const& v1, vec<4, int32, Q> const& v2)
|
static bool call(vec<4, int32, Q> const& v1, vec<4, int32, Q> const& v2)
|
||||||
{
|
{
|
||||||
@ -321,7 +321,7 @@ namespace detail
|
|||||||
};
|
};
|
||||||
|
|
||||||
template<qualifier Q>
|
template<qualifier Q>
|
||||||
struct compute_vec4_nequal<float, P, false, 32, true>
|
struct compute_vec4_nequal<float, Q, false, 32, true>
|
||||||
{
|
{
|
||||||
static bool call(vec<4, float, Q> const& v1, vec<4, float, Q> const& v2)
|
static bool call(vec<4, float, Q> const& v1, vec<4, float, Q> const& v2)
|
||||||
{
|
{
|
||||||
@ -330,7 +330,7 @@ namespace detail
|
|||||||
};
|
};
|
||||||
|
|
||||||
template<qualifier Q>
|
template<qualifier Q>
|
||||||
struct compute_vec4_nequal<int32, P, true, 32, true>
|
struct compute_vec4_nequal<int32, Q, true, 32, true>
|
||||||
{
|
{
|
||||||
static bool call(vec<4, int32, Q> const& v1, vec<4, int32, Q> const& v2)
|
static bool call(vec<4, int32, Q> const& v1, vec<4, int32, Q> const& v2)
|
||||||
{
|
{
|
||||||
|
Loading…
Reference in New Issue
Block a user