From 9f00ba86cb26637aebdde24401106d729b70acb6 Mon Sep 17 00:00:00 2001 From: Christophe Riccio Date: Sat, 28 May 2016 17:54:37 +0200 Subject: [PATCH] Fixed SIMD code path selection --- glm/detail/func_common_simd.inl | 4 +-- glm/detail/func_exponential_simd.inl | 9 +++++ glm/detail/func_geometric_simd.inl | 3 ++ glm/detail/func_packing_simd.inl | 9 +++++ glm/detail/func_vector_relational_simd.inl | 9 +++++ glm/detail/setup.hpp | 42 ++++++++++++++-------- glm/detail/type_vec4.hpp | 4 +-- glm/detail/type_vec4_simd.inl | 20 +++++------ glm/simd/common.h | 10 +++--- glm/simd/geometric.h | 10 +++--- glm/simd/integer.h | 4 +-- glm/simd/matrix.h | 4 +-- glm/simd/packing.h | 4 +-- glm/simd/trigonometric.h | 4 +-- glm/simd/vector_relational.h | 4 +-- 15 files changed, 92 insertions(+), 48 deletions(-) diff --git a/glm/detail/func_common_simd.inl b/glm/detail/func_common_simd.inl index 9d1106ff..3fa514a4 100644 --- a/glm/detail/func_common_simd.inl +++ b/glm/detail/func_common_simd.inl @@ -1,7 +1,7 @@ /// @ref core /// @file glm/detail/func_common_simd.inl -#if GLM_ARCH & GLM_ARCH_SSE2 +#if GLM_ARCH & GLM_ARCH_SSE2_FLAG #include "../simd/common.h" @@ -135,4 +135,4 @@ namespace detail }//namespace detail }//namespace glm -#endif//GLM_ARCH & GLM_ARCH_SSE2 +#endif//GLM_ARCH & GLM_ARCH_SSE2_FLAG diff --git a/glm/detail/func_exponential_simd.inl b/glm/detail/func_exponential_simd.inl index e69de29b..7358c15f 100644 --- a/glm/detail/func_exponential_simd.inl +++ b/glm/detail/func_exponential_simd.inl @@ -0,0 +1,9 @@ +/// @ref core +/// @file glm/detail/func_exponential_simd.inl + +namespace glm{ +namespace detail +{ + +}//namespace detail +}//namespace glm diff --git a/glm/detail/func_geometric_simd.inl b/glm/detail/func_geometric_simd.inl index 21f9615d..78156df3 100644 --- a/glm/detail/func_geometric_simd.inl +++ b/glm/detail/func_geometric_simd.inl @@ -1,5 +1,7 @@ #include "../simd/geometric.h" +#if GLM_ARCH & GLM_ARCH_SSE2_FLAG + namespace glm{ namespace detail { @@ -15,3 +17,4 @@ namespace detail }//namespace detail }//namespace glm +#endif//GLM_ARCH & GLM_ARCH_SSE2_FLAG diff --git a/glm/detail/func_packing_simd.inl b/glm/detail/func_packing_simd.inl index e69de29b..1d4a5225 100644 --- a/glm/detail/func_packing_simd.inl +++ b/glm/detail/func_packing_simd.inl @@ -0,0 +1,9 @@ +/// @ref core +/// @file glm/detail/func_packing_simd.inl + +namespace glm{ +namespace detail +{ + +}//namespace detail +}//namespace glm diff --git a/glm/detail/func_vector_relational_simd.inl b/glm/detail/func_vector_relational_simd.inl index e69de29b..faab59b8 100644 --- a/glm/detail/func_vector_relational_simd.inl +++ b/glm/detail/func_vector_relational_simd.inl @@ -0,0 +1,9 @@ +/// @ref core +/// @file glm/detail/func_vector_relational_simd.inl + +namespace glm{ +namespace detail +{ + +}//namespace detail +}//namespace glm diff --git a/glm/detail/setup.hpp b/glm/detail/setup.hpp index b219af9f..0c1fd4bd 100644 --- a/glm/detail/setup.hpp +++ b/glm/detail/setup.hpp @@ -68,20 +68,34 @@ // User defines: GLM_FORCE_PURE GLM_FORCE_SSE2 GLM_FORCE_SSE3 GLM_FORCE_AVX GLM_FORCE_AVX2 GLM_FORCE_AVX2 -#define GLM_ARCH_PURE 0x00000000 -#define GLM_ARCH_X86 0x00000001 -#define GLM_ARCH_SSE2 0x00000002 | GLM_ARCH_X86 -#define GLM_ARCH_SSE3 0x00000004 | GLM_ARCH_SSE2 -#define GLM_ARCH_SSSE3 0x00000008 | GLM_ARCH_SSE3 -#define GLM_ARCH_SSE41 0x00000010 | GLM_ARCH_SSSE3 -#define GLM_ARCH_SSE42 0x00000020 | GLM_ARCH_SSE41 -#define GLM_ARCH_AVX 0x00000040 | GLM_ARCH_SSE42 -#define GLM_ARCH_AVX2 0x00000080 | GLM_ARCH_AVX -#define GLM_ARCH_AVX512 0x00000100 | GLM_ARCH_AVX2 // Skylake subset -#define GLM_ARCH_ARM 0x00000100 -#define GLM_ARCH_NEON 0x00000200 | GLM_ARCH_ARM -#define GLM_ARCH_MIPS 0x00010000 -#define GLM_ARCH_PPC 0x01000000 +#define GLM_ARCH_X86_FLAG 0x00000001 +#define GLM_ARCH_SSE2_FLAG 0x00000002 +#define GLM_ARCH_SSE3_FLAG 0x00000004 +#define GLM_ARCH_SSSE3_FLAG 0x00000008 +#define GLM_ARCH_SSE41_FLAG 0x00000010 +#define GLM_ARCH_SSE42_FLAG 0x00000020 +#define GLM_ARCH_AVX_FLAG 0x00000040 +#define GLM_ARCH_AVX2_FLAG 0x00000080 +#define GLM_ARCH_AVX512_FLAG 0x00000100 // Skylake subset +#define GLM_ARCH_ARM_FLAG 0x00000100 +#define GLM_ARCH_NEON_FLAG 0x00000200 +#define GLM_ARCH_MIPS_FLAG 0x00010000 +#define GLM_ARCH_PPC_FLAG 0x01000000 + +#define GLM_ARCH_PURE (0x00000000) +#define GLM_ARCH_X86 (GLM_ARCH_X86_FLAG) +#define GLM_ARCH_SSE2 (GLM_ARCH_SSE2_FLAG | GLM_ARCH_X86) +#define GLM_ARCH_SSE3 (GLM_ARCH_SSE3_FLAG | GLM_ARCH_SSE2) +#define GLM_ARCH_SSSE3 (GLM_ARCH_SSSE3_FLAG | GLM_ARCH_SSE3) +#define GLM_ARCH_SSE41 (GLM_ARCH_SSE41_FLAG | GLM_ARCH_SSSE3) +#define GLM_ARCH_SSE42 (GLM_ARCH_SSE42_FLAG | GLM_ARCH_SSE41) +#define GLM_ARCH_AVX (GLM_ARCH_AVX_FLAG | GLM_ARCH_SSE42) +#define GLM_ARCH_AVX2 (GLM_ARCH_AVX2_FLAG | GLM_ARCH_AVX) +#define GLM_ARCH_AVX512 (GLM_ARCH_AVX512_FLAG | GLM_ARCH_AVX2) // Skylake subset +#define GLM_ARCH_ARM (GLM_ARCH_ARM_FLAG) +#define GLM_ARCH_NEON (GLM_ARCH_NEON_FLAG | GLM_ARCH_ARM) +#define GLM_ARCH_MIPS (GLM_ARCH_MIPS_FLAG) +#define GLM_ARCH_PPC (GLM_ARCH_PPC_FLAG) #if defined(GLM_FORCE_PURE) # define GLM_ARCH GLM_ARCH_PURE diff --git a/glm/detail/type_vec4.hpp b/glm/detail/type_vec4.hpp index 3af57d5f..e80851f4 100644 --- a/glm/detail/type_vec4.hpp +++ b/glm/detail/type_vec4.hpp @@ -49,7 +49,7 @@ namespace detail }; # endif -# if (GLM_ARCH & GLM_ARCH_AVX) +# if (GLM_ARCH & GLM_ARCH_AVX_FLAG) template <> struct simd_data { @@ -57,7 +57,7 @@ namespace detail }; # endif -# if (GLM_ARCH & GLM_ARCH_AVX2) +# if (GLM_ARCH & GLM_ARCH_AVX2_FLAG) template <> struct simd_data { diff --git a/glm/detail/type_vec4_simd.inl b/glm/detail/type_vec4_simd.inl index d1017a7b..953160bc 100644 --- a/glm/detail/type_vec4_simd.inl +++ b/glm/detail/type_vec4_simd.inl @@ -1,7 +1,7 @@ /// @ref core /// @file glm/detail/type_tvec4_simd.inl -#if GLM_ARCH & GLM_ARCH_SSE2 +#if GLM_ARCH & GLM_ARCH_SSE2_FLAG namespace glm{ namespace detail @@ -72,7 +72,7 @@ namespace detail } }; -# if GLM_ARCH & GLM_ARCH_AVX2 +# if GLM_ARCH & GLM_ARCH_AVX2_FLAG template struct compute_vec4_and { @@ -96,7 +96,7 @@ namespace detail } }; -# if GLM_ARCH & GLM_ARCH_AVX2 +# if GLM_ARCH & GLM_ARCH_AVX2_FLAG template struct compute_vec4_or { @@ -120,7 +120,7 @@ namespace detail } }; -# if GLM_ARCH & GLM_ARCH_AVX2 +# if GLM_ARCH & GLM_ARCH_AVX2_FLAG template struct compute_vec4_xor { @@ -144,7 +144,7 @@ namespace detail } }; -# if GLM_ARCH & GLM_ARCH_AVX2 +# if GLM_ARCH & GLM_ARCH_AVX2_FLAG template struct compute_vec4_shift_left { @@ -168,7 +168,7 @@ namespace detail } }; -# if GLM_ARCH & GLM_ARCH_AVX2 +# if GLM_ARCH & GLM_ARCH_AVX2_FLAG template struct compute_vec4_shift_right { @@ -192,7 +192,7 @@ namespace detail } }; -# if GLM_ARCH & GLM_ARCH_AVX2 +# if GLM_ARCH & GLM_ARCH_AVX2_FLAG template struct compute_vec4_bitwise_not { @@ -248,7 +248,7 @@ namespace detail data(_mm_set1_ps(s)) {} -# if GLM_ARCH & GLM_ARCH_AVX +# if GLM_ARCH & GLM_ARCH_AVX_FLAG template <> GLM_FUNC_QUALIFIER GLM_CONSTEXPR_SIMD tvec4::tvec4(double s) : data(_mm256_set1_pd(s)) @@ -280,7 +280,7 @@ namespace detail data(_mm_set1_epi32(s)) {} -# if GLM_ARCH & GLM_ARCH_AVX2 +# if GLM_ARCH & GLM_ARCH_AVX2_FLAG template <> GLM_FUNC_QUALIFIER GLM_CONSTEXPR_SIMD tvec4::tvec4(int64 s) : data(_mm256_set1_epi64x(s)) @@ -350,4 +350,4 @@ namespace detail */ }//namespace glm -#endif//GLM_ARCH & GLM_ARCH_SSE2 +#endif//GLM_ARCH & GLM_ARCH_SSE2_FLAG diff --git a/glm/simd/common.h b/glm/simd/common.h index be712ca1..91e41791 100644 --- a/glm/simd/common.h +++ b/glm/simd/common.h @@ -3,12 +3,12 @@ #pragma once -#if GLM_ARCH & GLM_ARCH_SSE2 +#if GLM_ARCH & GLM_ARCH_SSE2_FLAG //mad GLM_FUNC_QUALIFIER __m128 glm_f32v1_mad(__m128 a, __m128 b, __m128 c) { -# if GLM_ARCH & GLM_ARCH_AVX2 +# if GLM_ARCH & GLM_ARCH_AVX2_FLAG return _mm_fmadd_ss(a, b, c); # else return _mm_add_ss(_mm_mul_ss(a, b), c); @@ -18,7 +18,7 @@ GLM_FUNC_QUALIFIER __m128 glm_f32v1_mad(__m128 a, __m128 b, __m128 c) //mad GLM_FUNC_QUALIFIER __m128 glm_f32v4_mad(__m128 a, __m128 b, __m128 c) { -# if GLM_ARCH & GLM_ARCH_AVX2 +# if GLM_ARCH & GLM_ARCH_AVX2_FLAG return _mm_fmadd_ps(a, b, c); # else return _mm_add_ps(_mm_mul_ps(a, b), c); @@ -33,7 +33,7 @@ GLM_FUNC_QUALIFIER __m128 glm_f32v4_abs(__m128 x) GLM_FUNC_QUALIFIER __m128i glm_i32v4_abs(__m128i x) { -# if GLM_ARCH & GLM_ARCH_SSSE3 +# if GLM_ARCH & GLM_ARCH_SSSE3_FLAG return _mm_sign_epi32(x, x); # else __m128i const sgn0 = _mm_srai_epi32(x, 31); @@ -202,4 +202,4 @@ GLM_FUNC_QUALIFIER __m128 glm_f32v4_sqrt_wip(__m128 x) return Mul3; } -#endif//GLM_ARCH & GLM_ARCH_SSE2 +#endif//GLM_ARCH & GLM_ARCH_SSE2_FLAG diff --git a/glm/simd/geometric.h b/glm/simd/geometric.h index ac984d2d..6c47f69b 100644 --- a/glm/simd/geometric.h +++ b/glm/simd/geometric.h @@ -5,13 +5,13 @@ #include "common.h" -#if GLM_ARCH & GLM_ARCH_SSE2 +#if GLM_ARCH & GLM_ARCH_SSE2_FLAG GLM_FUNC_QUALIFIER __m128 glm_f32v4_dot(__m128 v1, __m128 v2) { -# if GLM_ARCH & GLM_ARCH_AVX +# if GLM_ARCH & GLM_ARCH_AVX_FLAG return _mm_dp_ps(v1, v2, 0xff); -# elif GLM_ARCH & GLM_ARCH_SSE3 +# elif GLM_ARCH & GLM_ARCH_SSE3_FLAG __m128 const Mul0 = _mm_mul_ps(v1, v2); __m128 const Hadd0 = _mm_hadd_ps(Mul0, Mul0); __m128 const Hadd1 = _mm_hadd_ps(Hadd0, Hadd0); @@ -28,9 +28,9 @@ GLM_FUNC_QUALIFIER __m128 glm_f32v4_dot(__m128 v1, __m128 v2) GLM_FUNC_QUALIFIER __m128 glm_f32v1_dot(__m128 v1, __m128 v2) { -# if GLM_ARCH & GLM_ARCH_AVX +# if GLM_ARCH & GLM_ARCH_AVX_FLAG return _mm_dp_ps(v1, v2, 0xff); -# elif GLM_ARCH & GLM_ARCH_SSE3 +# elif GLM_ARCH & GLM_ARCH_SSE3_FLAG __m128 const mul0 = _mm_mul_ps(v1, v2); __m128 const had0 = _mm_hadd_ps(mul0, mul0); __m128 const had1 = _mm_hadd_ps(had0, had0); diff --git a/glm/simd/integer.h b/glm/simd/integer.h index f645f3f5..aab8b40c 100644 --- a/glm/simd/integer.h +++ b/glm/simd/integer.h @@ -3,7 +3,7 @@ #pragma once -#if GLM_ARCH & GLM_ARCH_SSE2 +#if GLM_ARCH & GLM_ARCH_SSE2_FLAG GLM_FUNC_QUALIFIER __m128i glm_i128_interleave(__m128i x) { @@ -112,4 +112,4 @@ GLM_FUNC_QUALIFIER __m128i glm_i128_interleave2(__m128i x, __m128i y) return Reg1; } -#endif//GLM_ARCH & GLM_ARCH_SSE2 +#endif//GLM_ARCH & GLM_ARCH_SSE2_FLAG diff --git a/glm/simd/matrix.h b/glm/simd/matrix.h index fbcda869..6ed30286 100644 --- a/glm/simd/matrix.h +++ b/glm/simd/matrix.h @@ -5,7 +5,7 @@ #include "geometric.h" -#if GLM_ARCH & GLM_ARCH_SSE2 +#if GLM_ARCH & GLM_ARCH_SSE2_FLAG static const __m128 GLM_VAR_USED _m128_rad_ps = _mm_set_ps1(3.141592653589793238462643383279f / 180.f); static const __m128 GLM_VAR_USED _m128_deg_ps = _mm_set_ps1(180.f / 3.141592653589793238462643383279f); @@ -1029,4 +1029,4 @@ GLM_FUNC_QUALIFIER void glm_f32m4_outer(__m128 const & c, __m128 const & r, __m1 out[3] = _mm_mul_ps(c, _mm_shuffle_ps(r, r, _MM_SHUFFLE(3, 3, 3, 3))); } -#endif//GLM_ARCH & GLM_ARCH_SSE2 +#endif//GLM_ARCH & GLM_ARCH_SSE2_FLAG diff --git a/glm/simd/packing.h b/glm/simd/packing.h index cd84a261..85c4bbf6 100644 --- a/glm/simd/packing.h +++ b/glm/simd/packing.h @@ -3,6 +3,6 @@ #pragma once -#if GLM_ARCH & GLM_ARCH_SSE2 +#if GLM_ARCH & GLM_ARCH_SSE2_FLAG -#endif//GLM_ARCH & GLM_ARCH_SSE2 +#endif//GLM_ARCH & GLM_ARCH_SSE2_FLAG diff --git a/glm/simd/trigonometric.h b/glm/simd/trigonometric.h index 37815145..b20c649a 100644 --- a/glm/simd/trigonometric.h +++ b/glm/simd/trigonometric.h @@ -3,7 +3,7 @@ #pragma once -#if GLM_ARCH & GLM_ARCH_SSE2 +#if GLM_ARCH & GLM_ARCH_SSE2_FLAG -#endif//GLM_ARCH & GLM_ARCH_SSE2 +#endif//GLM_ARCH & GLM_ARCH_SSE2_FLAG diff --git a/glm/simd/vector_relational.h b/glm/simd/vector_relational.h index c49d5af8..e16721a5 100644 --- a/glm/simd/vector_relational.h +++ b/glm/simd/vector_relational.h @@ -3,6 +3,6 @@ #pragma once -#if GLM_ARCH & GLM_ARCH_SSE2 +#if GLM_ARCH & GLM_ARCH_SSE2_FLAG -#endif//GLM_ARCH & GLM_ARCH_SSE2 +#endif//GLM_ARCH & GLM_ARCH_SSE2_FLAG