diff --git a/glm/detail/func_common_simd.inl b/glm/detail/func_common_simd.inl index d44ff2db..4f13faf2 100644 --- a/glm/detail/func_common_simd.inl +++ b/glm/detail/func_common_simd.inl @@ -16,7 +16,7 @@ namespace detail GLM_FUNC_QUALIFIER static tvec4 call(tvec4 const & v) { tvec4 result(uninitialize); - result.data = glm_f32v4_abs(v.data); + result.data = glm_vec4_abs(v.data); return result; } }; @@ -27,7 +27,7 @@ namespace detail GLM_FUNC_QUALIFIER static tvec4 call(tvec4 const & v) { tvec4 result(uninitialize); - result.data = glm_i32v4_abs(v.data); + result.data = glm_ivec4_abs(v.data); return result; } }; @@ -38,7 +38,7 @@ namespace detail GLM_FUNC_QUALIFIER static tvec4 call(tvec4 const & v) { tvec4 result(uninitialize); - result.data = glm_f32v4_flr(v.data); + result.data = glm_vec4_floor(v.data); return result; } }; @@ -49,7 +49,7 @@ namespace detail GLM_FUNC_QUALIFIER static tvec4 call(tvec4 const & v) { tvec4 result(uninitialize); - result.data = glm_f32v4_ceil(v.data); + result.data = glm_vec4_ceil(v.data); return result; } }; @@ -60,7 +60,7 @@ namespace detail GLM_FUNC_QUALIFIER static tvec4 call(tvec4 const & v) { tvec4 result(uninitialize); - result.data = glm_f32v4_frc(v.data); + result.data = glm_vec4_fract(v.data); return result; } }; @@ -71,7 +71,7 @@ namespace detail GLM_FUNC_QUALIFIER static tvec4 call(tvec4 const & v) { tvec4 result(uninitialize); - result.data = glm_f32v4_rnd(v.data); + result.data = glm_vec4_round(v.data); return result; } }; @@ -82,7 +82,7 @@ namespace detail GLM_FUNC_QUALIFIER static tvec4 call(tvec4 const & x, tvec4 const & y) { tvec4 result(uninitialize); - result.data = glm_f32v4_mod(x.data, y.data); + result.data = glm_vec4_mod(x.data, y.data); return result; } }; diff --git a/glm/detail/func_exponential_simd.inl b/glm/detail/func_exponential_simd.inl index 9f593dea..ac6cab10 100644 --- a/glm/detail/func_exponential_simd.inl +++ b/glm/detail/func_exponential_simd.inl @@ -25,7 +25,7 @@ namespace detail GLM_FUNC_QUALIFIER static tvec4 call(tvec4 const & v) { tvec4 result(uninitialize); - result.data = glm_f32v4_sqrt_lowp(v.data); + result.data = glm_vec4_sqrt_lowp(v.data); return result; } }; diff --git a/glm/detail/func_geometric.inl b/glm/detail/func_geometric.inl index ddf2a9a0..4fd7654b 100644 --- a/glm/detail/func_geometric.inl +++ b/glm/detail/func_geometric.inl @@ -114,6 +114,17 @@ namespace detail return I - N * dot(N, I) * static_cast(2); } }; + + template class vecType> + struct compute_refract + { + GLM_FUNC_QUALIFIER static vecType call(vecType const & I, vecType const & N, T eta) + { + T const dotValue(dot(N, I)); + T const k(static_cast(1) - eta * eta * (static_cast(1) - dotValue * dotValue)); + return (eta * I - (eta * dotValue + std::sqrt(k)) * N) * static_cast(k >= static_cast(0)); + } + }; }//namespace detail // length @@ -215,10 +226,9 @@ namespace detail // refract template - GLM_FUNC_QUALIFIER genType refract(genType const & I, genType const & N, genType const & eta) + GLM_FUNC_QUALIFIER genType refract(genType const & I, genType const & N, genType eta) { GLM_STATIC_ASSERT(std::numeric_limits::is_iec559, "'refract' accepts only floating-point inputs"); - genType const dotValue(dot(N, I)); genType const k(static_cast(1) - eta * eta * (static_cast(1) - dotValue * dotValue)); return (eta * I - (eta * dotValue + sqrt(k)) * N) * static_cast(k >= static_cast(0)); @@ -228,10 +238,7 @@ namespace detail GLM_FUNC_QUALIFIER vecType refract(vecType const & I, vecType const & N, T eta) { GLM_STATIC_ASSERT(std::numeric_limits::is_iec559, "'refract' accepts only floating-point inputs"); - - T const dotValue(dot(N, I)); - T const k(static_cast(1) - eta * eta * (static_cast(1) - dotValue * dotValue)); - return (eta * I - (eta * dotValue + std::sqrt(k)) * N) * static_cast(k >= static_cast(0)); + return detail::compute_refract::call(I, N, eta); } }//namespace glm diff --git a/glm/detail/func_geometric_simd.inl b/glm/detail/func_geometric_simd.inl index c99276f9..9e126812 100644 --- a/glm/detail/func_geometric_simd.inl +++ b/glm/detail/func_geometric_simd.inl @@ -13,7 +13,7 @@ namespace detail { GLM_FUNC_QUALIFIER static float call(tvec4 const & v) { - return _mm_cvtss_f32(glm_f32v4_len(v.data)); + return _mm_cvtss_f32(glm_vec4_length(v.data)); } }; @@ -22,7 +22,7 @@ namespace detail { GLM_FUNC_QUALIFIER static float call(tvec4 const & p0, tvec4 const & p1) { - return _mm_cvtss_f32(glm_f32v4_dst(p0.data, p1.data)); + return _mm_cvtss_f32(glm_vec4_distance(p0.data, p1.data)); } }; @@ -31,7 +31,7 @@ namespace detail { GLM_FUNC_QUALIFIER static float call(tvec4 const& x, tvec4 const& y) { - return _mm_cvtss_f32(glm_f32v1_dot(x.data, y.data)); + return _mm_cvtss_f32(glm_vec1_dot(x.data, y.data)); } }; @@ -42,11 +42,10 @@ namespace detail { __m128 const set0 = _mm_set_ps(0.0f, a.z, a.y, a.x); __m128 const set1 = _mm_set_ps(0.0f, b.z, b.y, b.x); - __m128 const xpd0 = glm_f32v4_xpd(set0, set1); + __m128 const xpd0 = glm_vec4_cross(set0, set1); tvec4 result(uninitialize); result.data = xpd0; - return tvec3(result); } }; @@ -57,7 +56,7 @@ namespace detail GLM_FUNC_QUALIFIER static tvec4 call(tvec4 const & v) { tvec4 result(uninitialize); - result.data = glm_f32v4_nrm(v.data); + result.data = glm_vec4_normalize(v.data); return result; } }; @@ -65,10 +64,10 @@ namespace detail template struct compute_faceforward { - GLM_FUNC_QUALIFIER static tvec4 call(tvec4 const & N, tvec4 const & I, tvec4 const & Nref) + GLM_FUNC_QUALIFIER static tvec4 call(tvec4 const& N, tvec4 const& I, tvec4 const& Nref) { tvec4 result(uninitialize); - result.data = glm_f32v4_ffd(N.data. I.data, Nref.data); + result.data = glm_vec4_faceforward(N.data. I.data, Nref.data); return result; } }; @@ -76,10 +75,21 @@ namespace detail template struct compute_reflect { - GLM_FUNC_QUALIFIER static tvec4 call(tvec4 const & I, tvec4 const & N) + GLM_FUNC_QUALIFIER static tvec4 call(tvec4 const& I, tvec4 const& N) { tvec4 result(uninitialize); - result.data = glm_f32v4_rfe(I.data, N.data); + result.data = glm_vec4_reflect(I.data, N.data); + return result; + } + }; + + template + struct compute_refract + { + GLM_FUNC_QUALIFIER static tvec4 call(tvec4 const& I, tvec4 const& N, float eta) + { + tvec4 result(uninitialize); + result.data = glm_vec4_refract(I.data, N.data, _mm_set1_ps(eta)); return result; } }; diff --git a/glm/simd/common.h b/glm/simd/common.h index 4f4e877d..fbcbeef0 100644 --- a/glm/simd/common.h +++ b/glm/simd/common.h @@ -7,8 +7,10 @@ #if GLM_ARCH & GLM_ARCH_SSE2_BIT -//mad -GLM_FUNC_QUALIFIER __m128 glm_f32v1_mad(__m128 a, __m128 b, __m128 c) +typedef __m128 glm_vec4; +typedef __m128i glm_ivec4; + +GLM_FUNC_QUALIFIER __m128 glm_vec1_fma(__m128 a, __m128 b, __m128 c) { # if GLM_ARCH & GLM_ARCH_AVX2_BIT return _mm_fmadd_ss(a, b, c); @@ -17,8 +19,7 @@ GLM_FUNC_QUALIFIER __m128 glm_f32v1_mad(__m128 a, __m128 b, __m128 c) # endif } -//mad -GLM_FUNC_QUALIFIER __m128 glm_f32v4_mad(__m128 a, __m128 b, __m128 c) +GLM_FUNC_QUALIFIER __m128 glm_vec4_fma(__m128 a, __m128 b, __m128 c) { # if GLM_ARCH & GLM_ARCH_AVX2_BIT return _mm_fmadd_ps(a, b, c); @@ -27,13 +28,12 @@ GLM_FUNC_QUALIFIER __m128 glm_f32v4_mad(__m128 a, __m128 b, __m128 c) # endif } -//abs -GLM_FUNC_QUALIFIER __m128 glm_f32v4_abs(__m128 x) +GLM_FUNC_QUALIFIER __m128 glm_vec4_abs(__m128 x) { return _mm_and_ps(x, _mm_castsi128_ps(_mm_set1_epi32(0x7FFFFFFF))); } -GLM_FUNC_QUALIFIER __m128i glm_i32v4_abs(__m128i x) +GLM_FUNC_QUALIFIER __m128i glm_ivec4_abs(__m128i x) { # if GLM_ARCH & GLM_ARCH_SSSE3_BIT return _mm_sign_epi32(x, x); @@ -45,8 +45,7 @@ GLM_FUNC_QUALIFIER __m128i glm_i32v4_abs(__m128i x) # endif } -//sign -GLM_FUNC_QUALIFIER __m128 glm_f32v4_sgn(__m128 x) +GLM_FUNC_QUALIFIER __m128 glm_vec4_sign(__m128 x) { __m128 const zro0 = _mm_setzero_ps(); __m128 const cmp0 = _mm_cmplt_ps(x, zro0); @@ -57,8 +56,7 @@ GLM_FUNC_QUALIFIER __m128 glm_f32v4_sgn(__m128 x) return or0; } -//round -GLM_FUNC_QUALIFIER __m128 glm_f32v4_rnd(__m128 x) +GLM_FUNC_QUALIFIER __m128 glm_vec4_round(__m128 x) { # if GLM_ARCH & GLM_ARCH_SSE41_BIT return _mm_round_ps(x, _MM_FROUND_TO_NEAREST_INT); @@ -72,13 +70,12 @@ GLM_FUNC_QUALIFIER __m128 glm_f32v4_rnd(__m128 x) # endif } -//floor -GLM_FUNC_QUALIFIER __m128 glm_f32v4_flr(__m128 x) +GLM_FUNC_QUALIFIER __m128 glm_vec4_floor(__m128 x) { # if GLM_ARCH & GLM_ARCH_SSE41_BIT return _mm_floor_ps(x); # else - __m128 const rnd0 = glm_f32v4_rnd(x); + __m128 const rnd0 = glm_vec4_round(x); __m128 const cmp0 = _mm_cmplt_ps(x, rnd0); __m128 const and0 = _mm_and_ps(cmp0, _mm_set1_ps(1.0f)); __m128 const sub0 = _mm_sub_ps(rnd0, and0); @@ -87,14 +84,14 @@ GLM_FUNC_QUALIFIER __m128 glm_f32v4_flr(__m128 x) } /* trunc TODO -GLM_FUNC_QUALIFIER __m128 glm_f32v4_trc(__m128 x) +GLM_FUNC_QUALIFIER __m128 glm_vec4_trunc(__m128 x) { return __m128(); } */ //roundEven -GLM_FUNC_QUALIFIER __m128 glm_f32v4_rde(__m128 x) +GLM_FUNC_QUALIFIER __m128 glm_vec4_roundEven(__m128 x) { __m128 const sgn0 = _mm_castsi128_ps(_mm_set1_epi32(0x80000000)); __m128 const and0 = _mm_and_ps(sgn0, x); @@ -104,12 +101,12 @@ GLM_FUNC_QUALIFIER __m128 glm_f32v4_rde(__m128 x) return sub0; } -GLM_FUNC_QUALIFIER __m128 glm_f32v4_ceil(__m128 x) +GLM_FUNC_QUALIFIER __m128 glm_vec4_ceil(__m128 x) { # if GLM_ARCH & GLM_ARCH_SSE41_BIT return _mm_ceil_ps(x); # else - __m128 const rnd0 = glm_f32v4_rnd(x); + __m128 const rnd0 = glm_vec4_round(x); __m128 const cmp0 = _mm_cmpgt_ps(x, rnd0); __m128 const and0 = _mm_and_ps(cmp0, _mm_set1_ps(1.0f)); __m128 const add0 = _mm_add_ps(rnd0, and0); @@ -117,51 +114,51 @@ GLM_FUNC_QUALIFIER __m128 glm_f32v4_ceil(__m128 x) # endif } -GLM_FUNC_QUALIFIER __m128 glm_f32v4_frc(__m128 x) +GLM_FUNC_QUALIFIER __m128 glm_vec4_fract(__m128 x) { - __m128 const flr0 = glm_f32v4_flr(x); + __m128 const flr0 = glm_vec4_floor(x); __m128 const sub0 = _mm_sub_ps(x, flr0); return sub0; } -GLM_FUNC_QUALIFIER __m128 glm_f32v4_mod(__m128 x, __m128 y) +GLM_FUNC_QUALIFIER __m128 glm_vec4_mod(__m128 x, __m128 y) { __m128 const div0 = _mm_div_ps(x, y); - __m128 const flr0 = glm_f32v4_flr(div0); + __m128 const flr0 = glm_vec4_floor(div0); __m128 const mul0 = _mm_mul_ps(y, flr0); __m128 const sub0 = _mm_sub_ps(x, mul0); return sub0; } -GLM_FUNC_QUALIFIER __m128 glm_f32v4_clp(__m128 v, __m128 minVal, __m128 maxVal) +GLM_FUNC_QUALIFIER __m128 glm_vec4_clamp(__m128 v, __m128 minVal, __m128 maxVal) { __m128 const min0 = _mm_min_ps(v, maxVal); __m128 const max0 = _mm_max_ps(min0, minVal); return max0; } -GLM_FUNC_QUALIFIER __m128 glm_f32v4_mix(__m128 v1, __m128 v2, __m128 a) +GLM_FUNC_QUALIFIER __m128 glm_vec4_mix(__m128 v1, __m128 v2, __m128 a) { __m128 const sub0 = _mm_sub_ps(_mm_set1_ps(1.0f), a); __m128 const mul0 = _mm_mul_ps(v1, sub0); - __m128 const mad0 = glm_f32v4_mad(v2, a, mul0); + __m128 const mad0 = glm_vec4_fma(v2, a, mul0); return mad0; } //step -GLM_FUNC_QUALIFIER __m128 glm_f32v4_stp(__m128 edge, __m128 x) +GLM_FUNC_QUALIFIER __m128 glm_vec4_step(__m128 edge, __m128 x) { __m128 const cmp = _mm_cmple_ps(x, edge); return _mm_movemask_ps(cmp) == 0 ? _mm_set1_ps(1.0f) : _mm_setzero_ps(); } // smoothstep -GLM_FUNC_QUALIFIER __m128 glm_f32v4_ssp(__m128 edge0, __m128 edge1, __m128 x) +GLM_FUNC_QUALIFIER __m128 glm_vec4_smoothstep(__m128 edge0, __m128 edge1, __m128 x) { __m128 const sub0 = _mm_sub_ps(x, edge0); __m128 const sub1 = _mm_sub_ps(edge1, edge0); __m128 const div0 = _mm_sub_ps(sub0, sub1); - __m128 const clp0 = glm_f32v4_clp(div0, _mm_setzero_ps(), _mm_set1_ps(1.0f)); + __m128 const clp0 = glm_vec4_clamp(div0, _mm_setzero_ps(), _mm_set1_ps(1.0f)); __m128 const mul0 = _mm_mul_ps(_mm_set1_ps(2.0f), clp0); __m128 const sub2 = _mm_sub_ps(_mm_set1_ps(3.0f), mul0); __m128 const mul1 = _mm_mul_ps(clp0, clp0); @@ -170,7 +167,7 @@ GLM_FUNC_QUALIFIER __m128 glm_f32v4_ssp(__m128 edge0, __m128 edge1, __m128 x) } // Agner Fog method -GLM_FUNC_QUALIFIER __m128 glm_f32v4_nan(__m128 x) +GLM_FUNC_QUALIFIER __m128 glm_vec4_nan(__m128 x) { __m128i const t1 = _mm_castps_si128(x); // reinterpret as 32-bit integer __m128i const t2 = _mm_sll_epi32(t1, _mm_cvtsi32_si128(1)); // shift out sign bit @@ -184,7 +181,7 @@ GLM_FUNC_QUALIFIER __m128 glm_f32v4_nan(__m128 x) } // Agner Fog method -GLM_FUNC_QUALIFIER __m128 glm_f32v4_inf(__m128 x) +GLM_FUNC_QUALIFIER __m128 glm_vec4_inf(__m128 x) { __m128i const t1 = _mm_castps_si128(x); // reinterpret as 32-bit integer __m128i const t2 = _mm_sll_epi32(t1, _mm_cvtsi32_si128(1)); // shift out sign bit diff --git a/glm/simd/exponential.h b/glm/simd/exponential.h index bc7f8cf3..17216a91 100644 --- a/glm/simd/exponential.h +++ b/glm/simd/exponential.h @@ -5,16 +5,12 @@ #if GLM_ARCH & GLM_ARCH_SSE2_BIT -// SSE scalar reciprocal sqrt using rsqrt op, plus one Newton-Rhaphson iteration -// By Elan Ruskin, http://assemblyrequired.crashworks.org/ -GLM_FUNC_QUALIFIER __m128 glm_f32v1_sqrt_lowp(__m128 x) +GLM_FUNC_QUALIFIER __m128 glm_vec1_sqrt_lowp(__m128 x) { return _mm_mul_ss(_mm_rsqrt_ss(x), x); } -// SSE scalar reciprocal sqrt using rsqrt op, plus one Newton-Rhaphson iteration -// By Elan Ruskin, http://assemblyrequired.crashworks.org/ -GLM_FUNC_QUALIFIER __m128 glm_f32v4_sqrt_lowp(__m128 x) +GLM_FUNC_QUALIFIER __m128 glm_vec4_sqrt_lowp(__m128 x) { return _mm_mul_ps(_mm_rsqrt_ps(x), x); } diff --git a/glm/simd/geometric.h b/glm/simd/geometric.h index 343bf2bc..41469999 100644 --- a/glm/simd/geometric.h +++ b/glm/simd/geometric.h @@ -7,7 +7,24 @@ #if GLM_ARCH & GLM_ARCH_SSE2_BIT -GLM_FUNC_QUALIFIER __m128 glm_f32v4_dot(__m128 v1, __m128 v2) +GLM_FUNC_DECL __m128 glm_vec4_dot(__m128 v1, __m128 v2); +GLM_FUNC_DECL __m128 glm_vec1_dot(__m128 v1, __m128 v2); + +GLM_FUNC_QUALIFIER __m128 glm_vec4_length(__m128 x) +{ + __m128 const dot0 = glm_vec4_dot(x, x); + __m128 const sqt0 = _mm_sqrt_ps(dot0); + return sqt0; +} + +GLM_FUNC_QUALIFIER __m128 glm_vec4_distance(__m128 p0, __m128 p1) +{ + __m128 const sub0 = _mm_sub_ps(p0, p1); + __m128 const len0 = glm_vec4_length(sub0); + return len0; +} + +GLM_FUNC_QUALIFIER __m128 glm_vec4_dot(__m128 v1, __m128 v2) { # if GLM_ARCH & GLM_ARCH_AVX_BIT return _mm_dp_ps(v1, v2, 0xff); @@ -26,7 +43,7 @@ GLM_FUNC_QUALIFIER __m128 glm_f32v4_dot(__m128 v1, __m128 v2) # endif } -GLM_FUNC_QUALIFIER __m128 glm_f32v1_dot(__m128 v1, __m128 v2) +GLM_FUNC_QUALIFIER __m128 glm_vec1_dot(__m128 v1, __m128 v2) { # if GLM_ARCH & GLM_ARCH_AVX_BIT return _mm_dp_ps(v1, v2, 0xff); @@ -45,21 +62,7 @@ GLM_FUNC_QUALIFIER __m128 glm_f32v1_dot(__m128 v1, __m128 v2) # endif } -GLM_FUNC_QUALIFIER __m128 glm_f32v4_len(__m128 x) -{ - __m128 const dot0 = glm_f32v4_dot(x, x); - __m128 const sqt0 = _mm_sqrt_ps(dot0); - return sqt0; -} - -GLM_FUNC_QUALIFIER __m128 glm_f32v4_dst(__m128 p0, __m128 p1) -{ - __m128 const sub0 = _mm_sub_ps(p0, p1); - __m128 const len0 = glm_f32v4_len(sub0); - return len0; -} - -GLM_FUNC_QUALIFIER __m128 glm_f32v4_xpd(__m128 v1, __m128 v2) +GLM_FUNC_QUALIFIER __m128 glm_vec4_cross(__m128 v1, __m128 v2) { __m128 const swp0 = _mm_shuffle_ps(v1, v1, _MM_SHUFFLE(3, 0, 2, 1)); __m128 const swp1 = _mm_shuffle_ps(v1, v1, _MM_SHUFFLE(3, 1, 0, 2)); @@ -71,35 +74,35 @@ GLM_FUNC_QUALIFIER __m128 glm_f32v4_xpd(__m128 v1, __m128 v2) return sub0; } -GLM_FUNC_QUALIFIER __m128 glm_f32v4_nrm(__m128 v) +GLM_FUNC_QUALIFIER __m128 glm_vec4_normalize(__m128 v) { - __m128 const dot0 = glm_f32v4_dot(v, v); + __m128 const dot0 = glm_vec4_dot(v, v); __m128 const isr0 = _mm_rsqrt_ps(dot0); __m128 const mul0 = _mm_mul_ps(v, isr0); return mul0; } -GLM_FUNC_QUALIFIER __m128 glm_f32v4_ffd(__m128 N, __m128 I, __m128 Nref) +GLM_FUNC_QUALIFIER __m128 glm_vec4_faceforward(__m128 N, __m128 I, __m128 Nref) { - __m128 const dot0 = glm_f32v4_dot(Nref, I); - __m128 const sgn0 = glm_f32v4_sgn(dot0); + __m128 const dot0 = glm_vec4_dot(Nref, I); + __m128 const sgn0 = glm_vec4_sign(dot0); __m128 const mul0 = _mm_mul_ps(sgn0, _mm_set1_ps(-1.0f)); __m128 const mul1 = _mm_mul_ps(N, mul0); return mul1; } -GLM_FUNC_QUALIFIER __m128 glm_f32v4_rfe(__m128 I, __m128 N) +GLM_FUNC_QUALIFIER __m128 glm_vec4_reflect(__m128 I, __m128 N) { - __m128 const dot0 = glm_f32v4_dot(N, I); + __m128 const dot0 = glm_vec4_dot(N, I); __m128 const mul0 = _mm_mul_ps(N, dot0); __m128 const mul1 = _mm_mul_ps(mul0, _mm_set1_ps(2.0f)); __m128 const sub0 = _mm_sub_ps(I, mul1); return sub0; } -GLM_FUNC_QUALIFIER __m128 glm_f32v4_rfa(__m128 I, __m128 N, __m128 eta) +GLM_FUNC_QUALIFIER __m128 glm_vec4_refract(__m128 I, __m128 N, __m128 eta) { - __m128 const dot0 = glm_f32v4_dot(N, I); + __m128 const dot0 = glm_vec4_dot(N, I); __m128 const mul0 = _mm_mul_ps(eta, eta); __m128 const mul1 = _mm_mul_ps(dot0, dot0); __m128 const sub0 = _mm_sub_ps(_mm_set1_ps(1.0f), mul0); @@ -110,7 +113,7 @@ GLM_FUNC_QUALIFIER __m128 glm_f32v4_rfa(__m128 I, __m128 N, __m128 eta) return _mm_set1_ps(0.0f); __m128 const sqt0 = _mm_sqrt_ps(mul2); - __m128 const mad0 = glm_f32v4_mad(eta, dot0, sqt0); + __m128 const mad0 = glm_vec4_fma(eta, dot0, sqt0); __m128 const mul4 = _mm_mul_ps(mad0, N); __m128 const mul5 = _mm_mul_ps(eta, I); __m128 const sub2 = _mm_sub_ps(mul5, mul4); diff --git a/glm/simd/matrix.h b/glm/simd/matrix.h index 91c8adea..a9fb5a83 100644 --- a/glm/simd/matrix.h +++ b/glm/simd/matrix.h @@ -380,7 +380,7 @@ GLM_FUNC_QUALIFIER __m128 glm_f32m4_det_highp(__m128 const in[4]) // + m[0][1] * Inverse[1][0] // + m[0][2] * Inverse[2][0] // + m[0][3] * Inverse[3][0]; - __m128 Det0 = glm_f32v4_dot(in[0], Row2); + __m128 Det0 = glm_vec4_dot(in[0], Row2); return Det0; } @@ -444,7 +444,7 @@ GLM_FUNC_QUALIFIER __m128 glm_f32m4_detd(__m128 const m[4]) // + m[0][2] * DetCof[2] // + m[0][3] * DetCof[3]; - return glm_f32v4_dot(m[0], DetCof); + return glm_vec4_dot(m[0], DetCof); } GLM_FUNC_QUALIFIER __m128 glm_f32m4_det(__m128 const m[4]) @@ -507,7 +507,7 @@ GLM_FUNC_QUALIFIER __m128 glm_f32m4_det(__m128 const m[4]) // + m[0][2] * DetCof[2] // + m[0][3] * DetCof[3]; - return glm_f32v4_dot(m[0], DetCof); + return glm_vec4_dot(m[0], DetCof); } GLM_FUNC_QUALIFIER void glm_f32m4_inv(__m128 const in[4], __m128 out[4]) @@ -720,7 +720,7 @@ GLM_FUNC_QUALIFIER void glm_f32m4_inv(__m128 const in[4], __m128 out[4]) // + m[0][1] * Inverse[1][0] // + m[0][2] * Inverse[2][0] // + m[0][3] * Inverse[3][0]; - __m128 Det0 = glm_f32v4_dot(in[0], Row2); + __m128 Det0 = glm_vec4_dot(in[0], Row2); __m128 Rcp0 = _mm_div_ps(_mm_set1_ps(1.0f), Det0); //__m128 Rcp0 = _mm_rcp_ps(Det0); @@ -941,7 +941,7 @@ GLM_FUNC_QUALIFIER void glm_f32m4_inv_lowp(__m128 const in[4], __m128 out[4]) // + m[0][1] * Inverse[1][0] // + m[0][2] * Inverse[2][0] // + m[0][3] * Inverse[3][0]; - __m128 Det0 = glm_f32v4_dot(in[0], Row2); + __m128 Det0 = glm_vec4_dot(in[0], Row2); __m128 Rcp0 = _mm_rcp_ps(Det0); //__m128 Rcp0 = _mm_div_ps(one, Det0); // Inverse /= Determinant;