Improved SIMD and swizzle operators interactions with GCC and Clang, Added raw SIMD API.

This commit is contained in:
Christophe Riccio 2016-05-03 23:57:41 +02:00
parent aca1710c5f
commit 1f71f6fb3a
30 changed files with 1670 additions and 1875 deletions

View File

@ -19,6 +19,10 @@ file(GLOB_RECURSE GTX_SOURCE ./gtx/*.cpp)
file(GLOB_RECURSE GTX_INLINE ./gtx/*.inl)
file(GLOB_RECURSE GTX_HEADER ./gtx/*.hpp)
file(GLOB_RECURSE SIMD_SOURCE ./simd/*.cpp)
file(GLOB_RECURSE SIMD_INLINE ./simd/*.inl)
file(GLOB_RECURSE SIMD_HEADER ./simd/*.h)
source_group("Text Files" FILES ${ROOT_TEXT} ${ROOT_MD})
source_group("Core Files" FILES ${CORE_SOURCE})
source_group("Core Files" FILES ${CORE_INLINE})
@ -29,6 +33,9 @@ source_group("GTC Files" FILES ${GTC_HEADER})
source_group("GTX Files" FILES ${GTX_SOURCE})
source_group("GTX Files" FILES ${GTX_INLINE})
source_group("GTX Files" FILES ${GTX_HEADER})
source_group("SIMD Files" FILES ${SIMD_SOURCE})
source_group("SIMD Files" FILES ${SIMD_INLINE})
source_group("SIMD Files" FILES ${SIMD_HEADER})
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/..)
@ -37,7 +44,8 @@ if(GLM_TEST_ENABLE)
${ROOT_SOURCE} ${ROOT_INLINE} ${ROOT_HEADER}
${CORE_SOURCE} ${CORE_INLINE} ${CORE_HEADER}
${GTC_SOURCE} ${GTC_INLINE} ${GTC_HEADER}
${GTX_SOURCE} ${GTX_INLINE} ${GTX_HEADER})
${GTX_SOURCE} ${GTX_INLINE} ${GTX_HEADER}
${SIMD_SOURCE} ${SIMD_INLINE} ${SIMD_HEADER})
endif(GLM_TEST_ENABLE)
if(GLM_STATIC_LIBRARY_ENABLE)
@ -45,7 +53,8 @@ if(GLM_STATIC_LIBRARY_ENABLE)
${ROOT_SOURCE} ${ROOT_INLINE} ${ROOT_HEADER}
${CORE_SOURCE} ${CORE_INLINE} ${CORE_HEADER}
${GTC_SOURCE} ${GTC_INLINE} ${GTC_HEADER}
${GTX_SOURCE} ${GTX_INLINE} ${GTX_HEADER})
${GTX_SOURCE} ${GTX_INLINE} ${GTX_HEADER}
${SIMD_SOURCE} ${SIMD_INLINE} ${SIMD_HEADER})
endif(GLM_STATIC_LIBRARY_ENABLE)
if(GLM_DYNAMIC_LIBRARY_ENABLE)
@ -53,5 +62,6 @@ if(GLM_DYNAMIC_LIBRARY_ENABLE)
${ROOT_SOURCE} ${ROOT_INLINE} ${ROOT_HEADER}
${CORE_SOURCE} ${CORE_INLINE} ${CORE_HEADER}
${GTC_SOURCE} ${GTC_INLINE} ${GTC_HEADER}
${GTX_SOURCE} ${GTX_INLINE} ${GTX_HEADER})
${GTX_SOURCE} ${GTX_INLINE} ${GTX_HEADER}
${SIMD_SOURCE} ${SIMD_INLINE} ${SIMD_HEADER})
endif(GLM_DYNAMIC_LIBRARY_ENABLE)

View File

@ -734,6 +734,6 @@ namespace detail
}
}//namespace glm
#if GLM_ARCH != GLM_FORCE_PURE && GLM_HAS_ANONYMOUS_UNION && GLM_NOT_BUGGY_VC32BITS
#if GLM_ARCH != GLM_FORCE_PURE
# include "func_common_simd.inl"
#endif

View File

@ -1,38 +1,6 @@
namespace glm{
namespace detail
{
#if(GLM_COMPILER & GLM_COMPILER_VC)
#pragma warning(push)
#pragma warning(disable : 4510 4512 4610)
#endif
union ieee754_QNAN
{
const float f;
struct i
{
const unsigned int mantissa:23, exp:8, sign:1;
};
ieee754_QNAN() : f(0.0)/*, mantissa(0x7FFFFF), exp(0xFF), sign(0x0)*/ {}
};
#if(GLM_COMPILER & GLM_COMPILER_VC)
#pragma warning(pop)
#endif
static const __m128 GLM_VAR_USED zero = _mm_setzero_ps();
static const __m128 GLM_VAR_USED one = _mm_set_ps1(1.0f);
static const __m128 GLM_VAR_USED minus_one = _mm_set_ps1(-1.0f);
static const __m128 GLM_VAR_USED two = _mm_set_ps1(2.0f);
static const __m128 GLM_VAR_USED three = _mm_set_ps1(3.0f);
static const ieee754_QNAN absMask;
static const __m128 GLM_VAR_USED abs4Mask = _mm_set_ps1(absMask.f);
static const __m128 GLM_VAR_USED _epi32_sign_mask = _mm_castsi128_ps(_mm_set1_epi32(static_cast<int>(0x80000000)));
static const __m128 GLM_VAR_USED _ps_2pow23 = _mm_set_ps1(8388608.0f);
static const __m128 GLM_VAR_USED _ps_1 = _mm_set_ps1(1.0f);
/*
static const __m128 GLM_VAR_USED zero = _mm_setzero_ps();
static const __m128 GLM_VAR_USED one = _mm_set_ps1(1.0f);
@ -137,151 +105,5 @@ namespace detail
static const __m128 GLM_VAR_USED _ps_log2_c0 = _mm_set_ps1(1.44269504088896340735992f);
*/
GLM_FUNC_QUALIFIER __m128 abs_ps(__m128 x)
{
return _mm_and_ps(abs4Mask, x);
}
//sign
GLM_FUNC_QUALIFIER __m128 sgn_ps(__m128 x)
{
__m128 Neg = _mm_set1_ps(-1.0f);
__m128 Pos = _mm_set1_ps(1.0f);
__m128 Cmp0 = _mm_cmplt_ps(x, zero);
__m128 Cmp1 = _mm_cmpgt_ps(x, zero);
__m128 And0 = _mm_and_ps(Cmp0, Neg);
__m128 And1 = _mm_and_ps(Cmp1, Pos);
return _mm_or_ps(And0, And1);
}
//round
GLM_FUNC_QUALIFIER __m128 rnd_ps(__m128 x)
{
__m128 and0 = _mm_and_ps(_epi32_sign_mask, x);
__m128 or0 = _mm_or_ps(and0, _ps_2pow23);
__m128 add0 = _mm_add_ps(x, or0);
__m128 sub0 = _mm_sub_ps(add0, or0);
return sub0;
}
//floor
GLM_FUNC_QUALIFIER __m128 flr_ps(__m128 x)
{
__m128 rnd0 = rnd_ps(x);
__m128 cmp0 = _mm_cmplt_ps(x, rnd0);
__m128 and0 = _mm_and_ps(cmp0, glm::detail::_ps_1);
__m128 sub0 = _mm_sub_ps(rnd0, and0);
return sub0;
}
//trunc
//GLM_FUNC_QUALIFIER __m128 _mm_trc_ps(__m128 v)
//{
// return __m128();
//}
//roundEven
GLM_FUNC_QUALIFIER __m128 rde_ps(__m128 x)
{
__m128 and0 = _mm_and_ps(_epi32_sign_mask, x);
__m128 or0 = _mm_or_ps(and0, _ps_2pow23);
__m128 add0 = _mm_add_ps(x, or0);
__m128 sub0 = _mm_sub_ps(add0, or0);
return sub0;
}
GLM_FUNC_QUALIFIER __m128 ceil_ps(__m128 x)
{
__m128 rnd0 = rnd_ps(x);
__m128 cmp0 = _mm_cmpgt_ps(x, rnd0);
__m128 and0 = _mm_and_ps(cmp0, _ps_1);
__m128 add0 = _mm_add_ps(rnd0, and0);
return add0;
}
GLM_FUNC_QUALIFIER __m128 frc_ps(__m128 x)
{
__m128 flr0 = flr_ps(x);
__m128 sub0 = _mm_sub_ps(x, flr0);
return sub0;
}
GLM_FUNC_QUALIFIER __m128 mod_ps(__m128 x, __m128 y)
{
__m128 div0 = _mm_div_ps(x, y);
__m128 flr0 = flr_ps(div0);
__m128 mul0 = _mm_mul_ps(y, flr0);
__m128 sub0 = _mm_sub_ps(x, mul0);
return sub0;
}
GLM_FUNC_QUALIFIER __m128 clp_ps(__m128 v, __m128 minVal, __m128 maxVal)
{
__m128 min0 = _mm_min_ps(v, maxVal);
__m128 max0 = _mm_max_ps(min0, minVal);
return max0;
}
GLM_FUNC_QUALIFIER __m128 mix_ps(__m128 v1, __m128 v2, __m128 a)
{
__m128 sub0 = _mm_sub_ps(one, a);
__m128 mul0 = _mm_mul_ps(v1, sub0);
__m128 mul1 = _mm_mul_ps(v2, a);
__m128 add0 = _mm_add_ps(mul0, mul1);
return add0;
}
//step
GLM_FUNC_QUALIFIER __m128 stp_ps(__m128 edge, __m128 x)
{
__m128 cmp = _mm_cmple_ps(x, edge);
if(_mm_movemask_ps(cmp) == 0)
return one;
else
return zero;
}
// smoothstep
GLM_FUNC_QUALIFIER __m128 ssp_ps(__m128 edge0, __m128 edge1, __m128 x)
{
__m128 sub0 = _mm_sub_ps(x, edge0);
__m128 sub1 = _mm_sub_ps(edge1, edge0);
__m128 div0 = _mm_sub_ps(sub0, sub1);
__m128 clp0 = clp_ps(div0, zero, one);
__m128 mul0 = _mm_mul_ps(two, clp0);
__m128 sub2 = _mm_sub_ps(three, mul0);
__m128 mul1 = _mm_mul_ps(clp0, clp0);
__m128 mul2 = _mm_mul_ps(mul1, sub2);
return mul2;
}
/// \todo
//GLM_FUNC_QUALIFIER __m128 sse_nan_ps(__m128 x)
//{
// __m128 empty;
// return empty;
//}
/// \todo
//GLM_FUNC_QUALIFIER __m128 sse_inf_ps(__m128 x)
//{
// __m128 empty;
// return empty;
//}
// SSE scalar reciprocal sqrt using rsqrt op, plus one Newton-Rhaphson iteration
// By Elan Ruskin, http://assemblyrequired.crashworks.org/
GLM_FUNC_QUALIFIER __m128 sqrt_wip_ss(__m128 x)
{
__m128 const recip = _mm_rsqrt_ss(x); // "estimate" opcode
__m128 const half = _mm_set_ps1(0.5f);
__m128 const halfrecip = _mm_mul_ss(half, recip);
__m128 const threeminus_xrr = _mm_sub_ss(three, _mm_mul_ss(x, _mm_mul_ss (recip, recip)));
return _mm_mul_ss(halfrecip, threeminus_xrr);
}
}//namespace detail
}//namespace glm

View File

@ -171,6 +171,6 @@ namespace detail
}
}//namespace glm
#if GLM_ARCH != GLM_FORCE_PURE && GLM_HAS_ANONYMOUS_UNION && GLM_NOT_BUGGY_VC32BITS
#if GLM_ARCH != GLM_FORCE_PURE
# include "func_geometric_simd.inl"
#endif

View File

@ -1,119 +1,22 @@
#include "../simd/geometric.h"
namespace glm{
namespace detail
{
#if GLM_ARCH & GLM_ARCH_AVX
GLM_FUNC_QUALIFIER __m128 dot_ps(__m128 v1, __m128 v2)
{
return _mm_dp_ps(v1, v2, 0xff);
}
#else
GLM_FUNC_QUALIFIER __m128 dot_ps(__m128 v1, __m128 v2)
{
__m128 mul0 = _mm_mul_ps(v1, v2);
__m128 swp0 = _mm_shuffle_ps(mul0, mul0, _MM_SHUFFLE(2, 3, 0, 1));
__m128 add0 = _mm_add_ps(mul0, swp0);
__m128 swp1 = _mm_shuffle_ps(add0, add0, _MM_SHUFFLE(0, 1, 2, 3));
__m128 add1 = _mm_add_ps(add0, swp1);
return add1;
}
#endif
GLM_FUNC_QUALIFIER __m128 dot_ss(__m128 v1, __m128 v2)
{
__m128 mul0 = _mm_mul_ps(v1, v2);
__m128 mov0 = _mm_movehl_ps(mul0, mul0);
__m128 add0 = _mm_add_ps(mov0, mul0);
__m128 swp1 = _mm_shuffle_ps(add0, add0, 1);
__m128 add1 = _mm_add_ss(add0, swp1);
return add1;
}
GLM_FUNC_QUALIFIER __m128 len_ps(__m128 x)
{
__m128 dot0 = dot_ps(x, x);
__m128 sqt0 = _mm_sqrt_ps(dot0);
return sqt0;
}
GLM_FUNC_QUALIFIER __m128 dst_ps(__m128 p0, __m128 p1)
{
__m128 sub0 = _mm_sub_ps(p0, p1);
__m128 len0 = len_ps(sub0);
return len0;
}
GLM_FUNC_QUALIFIER __m128 xpd_ps(__m128 v1, __m128 v2)
{
__m128 swp0 = _mm_shuffle_ps(v1, v1, _MM_SHUFFLE(3, 0, 2, 1));
__m128 swp1 = _mm_shuffle_ps(v1, v1, _MM_SHUFFLE(3, 1, 0, 2));
__m128 swp2 = _mm_shuffle_ps(v2, v2, _MM_SHUFFLE(3, 0, 2, 1));
__m128 swp3 = _mm_shuffle_ps(v2, v2, _MM_SHUFFLE(3, 1, 0, 2));
__m128 mul0 = _mm_mul_ps(swp0, swp3);
__m128 mul1 = _mm_mul_ps(swp1, swp2);
__m128 sub0 = _mm_sub_ps(mul0, mul1);
return sub0;
}
GLM_FUNC_QUALIFIER __m128 nrm_ps(__m128 v)
{
__m128 dot0 = dot_ps(v, v);
__m128 isr0 = _mm_rsqrt_ps(dot0);
__m128 mul0 = _mm_mul_ps(v, isr0);
return mul0;
}
GLM_FUNC_QUALIFIER __m128 ffd_ps(__m128 N, __m128 I, __m128 Nref)
{
__m128 dot0 = dot_ps(Nref, I);
__m128 sgn0 = sgn_ps(dot0);
__m128 mul0 = _mm_mul_ps(sgn0, glm::detail::minus_one);
__m128 mul1 = _mm_mul_ps(N, mul0);
return mul1;
}
GLM_FUNC_QUALIFIER __m128 rfe_ps(__m128 I, __m128 N)
{
__m128 dot0 = dot_ps(N, I);
__m128 mul0 = _mm_mul_ps(N, dot0);
__m128 mul1 = _mm_mul_ps(mul0, glm::detail::two);
__m128 sub0 = _mm_sub_ps(I, mul1);
return sub0;
}
GLM_FUNC_QUALIFIER __m128 rfa_ps(__m128 I, __m128 N, __m128 eta)
{
__m128 dot0 = dot_ps(N, I);
__m128 mul0 = _mm_mul_ps(eta, eta);
__m128 mul1 = _mm_mul_ps(dot0, dot0);
__m128 sub0 = _mm_sub_ps(glm::detail::one, mul0);
__m128 sub1 = _mm_sub_ps(glm::detail::one, mul1);
__m128 mul2 = _mm_mul_ps(sub0, sub1);
if(_mm_movemask_ps(_mm_cmplt_ss(mul2, glm::detail::zero)) == 0)
return glm::detail::zero;
__m128 sqt0 = _mm_sqrt_ps(mul2);
__m128 mul3 = _mm_mul_ps(eta, dot0);
__m128 add0 = _mm_add_ps(mul3, sqt0);
__m128 mul4 = _mm_mul_ps(add0, N);
__m128 mul5 = _mm_mul_ps(eta, I);
__m128 sub2 = _mm_sub_ps(mul5, mul4);
return sub2;
}
template <>
struct compute_dot<tvec4, float, simd>
{
GLM_FUNC_QUALIFIER static float call(tvec4<float, simd> const& x, tvec4<float, simd> const& y)
# if GLM_HAS_UNRESTRICTED_UNIONS
template <>
struct compute_dot<tvec4, float, simd>
{
__m128 const dot0 = dot_ss(x.data, y.data);
GLM_FUNC_QUALIFIER static float call(tvec4<float, simd> const& x, tvec4<float, simd> const& y)
{
__m128 const dot0 = glm_dot_ss(x.data, y.data);
float Result = 0;
_mm_store_ss(&Result, dot0);
return Result;
}
};
float Result = 0;
_mm_store_ss(&Result, dot0);
return Result;
}
};
# endif//GLM_HAS_UNRESTRICTED_UNIONS
}//namespace detail
}//namespace glm

View File

@ -359,7 +359,7 @@ namespace detail
}
}//namespace glm
#if GLM_ARCH != GLM_FORCE_PURE && GLM_HAS_ANONYMOUS_UNION && GLM_NOT_BUGGY_VC32BITS
#if GLM_ARCH != GLM_FORCE_PURE
# include "func_integer_simd.inl"
#endif

View File

@ -279,7 +279,7 @@ namespace detail
}
}//namespace glm
#if GLM_ARCH != GLM_FORCE_PURE && GLM_HAS_ANONYMOUS_UNION && GLM_NOT_BUGGY_VC32BITS
#if GLM_ARCH != GLM_FORCE_PURE
# include "func_matrix_simd.inl"
#endif

File diff suppressed because it is too large Load Diff

View File

@ -5,6 +5,7 @@
#include <cassert>
#include <cstddef>
#include "../simd/platform.h"
///////////////////////////////////////////////////////////////////////////////////
// Version
@ -20,285 +21,6 @@
# pragma message ("GLM: version 0.9.8.0")
#endif//GLM_MESSAGE
///////////////////////////////////////////////////////////////////////////////////
// Platform
#define GLM_PLATFORM_UNKNOWN 0x00000000
#define GLM_PLATFORM_WINDOWS 0x00010000
#define GLM_PLATFORM_LINUX 0x00020000
#define GLM_PLATFORM_APPLE 0x00040000
//#define GLM_PLATFORM_IOS 0x00080000
#define GLM_PLATFORM_ANDROID 0x00100000
#define GLM_PLATFORM_CHROME_NACL 0x00200000
#define GLM_PLATFORM_UNIX 0x00400000
#define GLM_PLATFORM_QNXNTO 0x00800000
#define GLM_PLATFORM_WINCE 0x01000000
#define GLM_PLATFORM_CYGWIN 0x02000000
#ifdef GLM_FORCE_PLATFORM_UNKNOWN
# define GLM_PLATFORM GLM_PLATFORM_UNKNOWN
#elif defined(__CYGWIN__)
# define GLM_PLATFORM GLM_PLATFORM_CYGWIN
#elif defined(__QNXNTO__)
# define GLM_PLATFORM GLM_PLATFORM_QNXNTO
#elif defined(__APPLE__)
# define GLM_PLATFORM GLM_PLATFORM_APPLE
#elif defined(WINCE)
# define GLM_PLATFORM GLM_PLATFORM_WINCE
#elif defined(_WIN32)
# define GLM_PLATFORM GLM_PLATFORM_WINDOWS
#elif defined(__native_client__)
# define GLM_PLATFORM GLM_PLATFORM_CHROME_NACL
#elif defined(__ANDROID__)
# define GLM_PLATFORM GLM_PLATFORM_ANDROID
#elif defined(__linux)
# define GLM_PLATFORM GLM_PLATFORM_LINUX
#elif defined(__unix)
# define GLM_PLATFORM GLM_PLATFORM_UNIX
#else
# define GLM_PLATFORM GLM_PLATFORM_UNKNOWN
#endif//
// Report platform detection
#if(defined(GLM_MESSAGES) && !defined(GLM_MESSAGE_PLATFORM_DISPLAYED))
# define GLM_MESSAGE_PLATFORM_DISPLAYED
# if(GLM_PLATFORM & GLM_PLATFORM_QNXNTO)
# pragma message("GLM: QNX platform detected")
//# elif(GLM_PLATFORM & GLM_PLATFORM_IOS)
//# pragma message("GLM: iOS platform detected")
# elif(GLM_PLATFORM & GLM_PLATFORM_APPLE)
# pragma message("GLM: Apple platform detected")
# elif(GLM_PLATFORM & GLM_PLATFORM_WINCE)
# pragma message("GLM: WinCE platform detected")
# elif(GLM_PLATFORM & GLM_PLATFORM_WINDOWS)
# pragma message("GLM: Windows platform detected")
# elif(GLM_PLATFORM & GLM_PLATFORM_CHROME_NACL)
# pragma message("GLM: Native Client detected")
# elif(GLM_PLATFORM & GLM_PLATFORM_ANDROID)
# pragma message("GLM: Android platform detected")
# elif(GLM_PLATFORM & GLM_PLATFORM_LINUX)
# pragma message("GLM: Linux platform detected")
# elif(GLM_PLATFORM & GLM_PLATFORM_UNIX)
# pragma message("GLM: UNIX platform detected")
# elif(GLM_PLATFORM & GLM_PLATFORM_UNKNOWN)
# pragma message("GLM: platform unknown")
# else
# pragma message("GLM: platform not detected")
# endif
#endif//GLM_MESSAGE
///////////////////////////////////////////////////////////////////////////////////
// Compiler
// User defines: GLM_FORCE_COMPILER_UNKNOWN
// TODO ? __llvm__
#define GLM_COMPILER_UNKNOWN 0x00000000
// Intel
#define GLM_COMPILER_INTEL 0x00100000
#define GLM_COMPILER_INTEL12 0x00100010
#define GLM_COMPILER_INTEL12_1 0x00100020
#define GLM_COMPILER_INTEL13 0x00100030
#define GLM_COMPILER_INTEL14 0x00100040
#define GLM_COMPILER_INTEL15 0x00100050
#define GLM_COMPILER_INTEL16 0x00100060
// Visual C++ defines
#define GLM_COMPILER_VC 0x01000000
#define GLM_COMPILER_VC2010 0x01000090
#define GLM_COMPILER_VC2012 0x010000A0
#define GLM_COMPILER_VC2013 0x010000B0
#define GLM_COMPILER_VC2015 0x010000C0
// GCC defines
#define GLM_COMPILER_GCC 0x02000000
#define GLM_COMPILER_GCC44 0x020000B0
#define GLM_COMPILER_GCC45 0x020000C0
#define GLM_COMPILER_GCC46 0x020000D0
#define GLM_COMPILER_GCC47 0x020000E0
#define GLM_COMPILER_GCC48 0x020000F0
#define GLM_COMPILER_GCC49 0x02000100
#define GLM_COMPILER_GCC50 0x02000200
#define GLM_COMPILER_GCC51 0x02000300
#define GLM_COMPILER_GCC52 0x02000400
#define GLM_COMPILER_GCC53 0x02000500
#define GLM_COMPILER_GCC60 0x02000600
// CUDA
#define GLM_COMPILER_CUDA 0x10000000
#define GLM_COMPILER_CUDA40 0x10000040
#define GLM_COMPILER_CUDA41 0x10000050
#define GLM_COMPILER_CUDA42 0x10000060
#define GLM_COMPILER_CUDA50 0x10000070
#define GLM_COMPILER_CUDA60 0x10000080
#define GLM_COMPILER_CUDA65 0x10000090
#define GLM_COMPILER_CUDA70 0x100000A0
#define GLM_COMPILER_CUDA75 0x100000B0
// LLVM
#define GLM_COMPILER_LLVM 0x20000000
#define GLM_COMPILER_LLVM32 0x20000030
#define GLM_COMPILER_LLVM33 0x20000040
#define GLM_COMPILER_LLVM34 0x20000050
#define GLM_COMPILER_LLVM35 0x20000060
#define GLM_COMPILER_LLVM36 0x20000070
#define GLM_COMPILER_LLVM37 0x20000080
#define GLM_COMPILER_LLVM38 0x20000090
#define GLM_COMPILER_LLVM39 0x200000A0
// Apple Clang
#define GLM_COMPILER_APPLE_CLANG 0x40000000
#define GLM_COMPILER_APPLE_CLANG40 0x40000010
#define GLM_COMPILER_APPLE_CLANG41 0x40000020
#define GLM_COMPILER_APPLE_CLANG42 0x40000030
#define GLM_COMPILER_APPLE_CLANG50 0x40000040
#define GLM_COMPILER_APPLE_CLANG51 0x40000050
#define GLM_COMPILER_APPLE_CLANG60 0x40000060
#define GLM_COMPILER_APPLE_CLANG61 0x40000070
// Build model
#define GLM_MODEL_32 0x00000010
#define GLM_MODEL_64 0x00000020
// Force generic C++ compiler
#ifdef GLM_FORCE_COMPILER_UNKNOWN
# define GLM_COMPILER GLM_COMPILER_UNKNOWN
#elif defined(__INTEL_COMPILER)
# if __INTEL_COMPILER == 1200
# define GLM_COMPILER GLM_COMPILER_INTEL12
# elif __INTEL_COMPILER == 1210
# define GLM_COMPILER GLM_COMPILER_INTEL12_1
# elif __INTEL_COMPILER == 1300
# define GLM_COMPILER GLM_COMPILER_INTEL13
# elif __INTEL_COMPILER == 1400
# define GLM_COMPILER GLM_COMPILER_INTEL14
# elif __INTEL_COMPILER == 1500
# define GLM_COMPILER GLM_COMPILER_INTEL15
# elif __INTEL_COMPILER >= 1600
# define GLM_COMPILER GLM_COMPILER_INTEL16
# else
# define GLM_COMPILER GLM_COMPILER_INTEL
# endif
// CUDA
#elif defined(__CUDACC__)
# if !defined(CUDA_VERSION) && !defined(GLM_FORCE_CUDA)
# include <cuda.h> // make sure version is defined since nvcc does not define it itself!
# endif
# if CUDA_VERSION < 3000
# error "GLM requires CUDA 3.0 or higher"
# else
# define GLM_COMPILER GLM_COMPILER_CUDA
# endif
// Visual C++
#elif defined(_MSC_VER)
# if _MSC_VER < 1600
# error "GLM requires Visual C++ 2010 or higher"
# elif _MSC_VER == 1600
# define GLM_COMPILER GLM_COMPILER_VC2010
# elif _MSC_VER == 1700
# define GLM_COMPILER GLM_COMPILER_VC2012
# elif _MSC_VER == 1800
# define GLM_COMPILER GLM_COMPILER_VC2013
# elif _MSC_VER >= 1900
# define GLM_COMPILER GLM_COMPILER_VC2015
# else//_MSC_VER
# define GLM_COMPILER GLM_COMPILER_VC
# endif//_MSC_VER
// Clang
#elif defined(__clang__)
# if GLM_PLATFORM & GLM_PLATFORM_APPLE
# if __clang_major__ == 4 && __clang_minor__ == 0
# define GLM_COMPILER GLM_COMPILER_APPLE_CLANG40
# elif __clang_major__ == 4 && __clang_minor__ == 1
# define GLM_COMPILER GLM_COMPILER_APPLE_CLANG41
# elif __clang_major__ == 4 && __clang_minor__ == 2
# define GLM_COMPILER GLM_COMPILER_APPLE_CLANG42
# elif __clang_major__ == 5 && __clang_minor__ == 0
# define GLM_COMPILER GLM_COMPILER_APPLE_CLANG50
# elif __clang_major__ == 5 && __clang_minor__ == 1
# define GLM_COMPILER GLM_COMPILER_APPLE_CLANG51
# elif __clang_major__ == 6 && __clang_minor__ == 0
# define GLM_COMPILER GLM_COMPILER_APPLE_CLANG60
# elif __clang_major__ == 6 && __clang_minor__ >= 1
# define GLM_COMPILER GLM_COMPILER_APPLE_CLANG61
# elif __clang_major__ >= 7
# define GLM_COMPILER GLM_COMPILER_APPLE_CLANG61
# else
# define GLM_COMPILER GLM_COMPILER_APPLE_CLANG
# endif
# else
# if __clang_major__ == 3 && __clang_minor__ == 0
# define GLM_COMPILER GLM_COMPILER_LLVM30
# elif __clang_major__ == 3 && __clang_minor__ == 1
# define GLM_COMPILER GLM_COMPILER_LLVM31
# elif __clang_major__ == 3 && __clang_minor__ == 2
# define GLM_COMPILER GLM_COMPILER_LLVM32
# elif __clang_major__ == 3 && __clang_minor__ == 3
# define GLM_COMPILER GLM_COMPILER_LLVM33
# elif __clang_major__ == 3 && __clang_minor__ == 4
# define GLM_COMPILER GLM_COMPILER_LLVM34
# elif __clang_major__ == 3 && __clang_minor__ == 5
# define GLM_COMPILER GLM_COMPILER_LLVM35
# elif __clang_major__ == 3 && __clang_minor__ == 6
# define GLM_COMPILER GLM_COMPILER_LLVM36
# elif __clang_major__ == 3 && __clang_minor__ == 7
# define GLM_COMPILER GLM_COMPILER_LLVM37
# elif __clang_major__ == 3 && __clang_minor__ == 8
# define GLM_COMPILER GLM_COMPILER_LLVM38
# elif __clang_major__ == 3 && __clang_minor__ >= 9
# define GLM_COMPILER GLM_COMPILER_LLVM39
# elif __clang_major__ >= 4
# define GLM_COMPILER GLM_COMPILER_LLVM39
# else
# define GLM_COMPILER GLM_COMPILER_LLVM
# endif
# endif
// G++
#elif defined(__GNUC__) || defined(__MINGW32__)
# if (__GNUC__ == 4) && (__GNUC_MINOR__ == 2)
# define GLM_COMPILER (GLM_COMPILER_GCC42)
# elif (__GNUC__ == 4) && (__GNUC_MINOR__ == 3)
# define GLM_COMPILER (GLM_COMPILER_GCC43)
# elif (__GNUC__ == 4) && (__GNUC_MINOR__ == 4)
# define GLM_COMPILER (GLM_COMPILER_GCC44)
# elif (__GNUC__ == 4) && (__GNUC_MINOR__ == 5)
# define GLM_COMPILER (GLM_COMPILER_GCC45)
# elif (__GNUC__ == 4) && (__GNUC_MINOR__ == 6)
# define GLM_COMPILER (GLM_COMPILER_GCC46)
# elif (__GNUC__ == 4) && (__GNUC_MINOR__ == 7)
# define GLM_COMPILER (GLM_COMPILER_GCC47)
# elif (__GNUC__ == 4) && (__GNUC_MINOR__ == 8)
# define GLM_COMPILER (GLM_COMPILER_GCC48)
# elif (__GNUC__ == 4) && (__GNUC_MINOR__ >= 9)
# define GLM_COMPILER (GLM_COMPILER_GCC49)
# elif (__GNUC__ == 5) && (__GNUC_MINOR__ == 0)
# define GLM_COMPILER (GLM_COMPILER_GCC50)
# elif (__GNUC__ == 5) && (__GNUC_MINOR__ == 1)
# define GLM_COMPILER (GLM_COMPILER_GCC51)
# elif (__GNUC__ == 5) && (__GNUC_MINOR__ == 2)
# define GLM_COMPILER (GLM_COMPILER_GCC52)
# elif (__GNUC__ == 5) && (__GNUC_MINOR__ >= 3)
# define GLM_COMPILER (GLM_COMPILER_GCC53)
# elif (__GNUC__ >= 6)
# define GLM_COMPILER (GLM_COMPILER_GCC60)
# else
# define GLM_COMPILER (GLM_COMPILER_GCC)
# endif
#else
# define GLM_COMPILER GLM_COMPILER_UNKNOWN
#endif
#ifndef GLM_COMPILER
#error "GLM_COMPILER undefined, your compiler may not be supported by GLM. Add #define GLM_COMPILER 0 to ignore this message."
#endif//GLM_COMPILER
// Report compiler detection
#if defined(GLM_MESSAGES) && !defined(GLM_MESSAGE_COMPILER_DISPLAYED)
# define GLM_MESSAGE_COMPILER_DISPLAYED
@ -323,7 +45,7 @@
// Build model
#if defined(__arch64__) || defined(__LP64__) || defined(_M_X64) || defined(__ppc64__) || defined(__x86_64__)
# define GLM_MODEL GLM_MODEL_64
# define GLM_MODEL GLM_MODEL_64
#elif defined(__i386__) || defined(__ppc__)
# define GLM_MODEL GLM_MODEL_32
#else
@ -344,7 +66,7 @@
#endif//GLM_MESSAGE
///////////////////////////////////////////////////////////////////////////////////
// Platform
// Instruction sets
// User defines: GLM_FORCE_PURE GLM_FORCE_SSE2 GLM_FORCE_SSE3 GLM_FORCE_AVX GLM_FORCE_AVX2 GLM_FORCE_AVX2
@ -689,16 +411,23 @@
((GLM_COMPILER & GLM_COMPILER_VC) && (GLM_COMPILER >= GLM_COMPILER_VC2013))))
#endif
// N2544 Unrestricted unions
// N2544 Unrestricted unions http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2008/n2544.pdf
#define GLM_NOT_BUGGY_VC32BITS (!(GLM_MODEL == GLM_MODEL_32 && (GLM_COMPILER & GLM_COMPILER_VC) && GLM_COMPILER < GLM_COMPILER_VC2013))
#if GLM_COMPILER & (GLM_COMPILER_LLVM | GLM_COMPILER_APPLE_CLANG)
# define GLM_HAS_UNRESTRICTED_UNIONS __has_feature(cxx_unrestricted_unions)
#elif GLM_LANG & (GLM_LANG_CXX11_FLAG | GLM_LANG_CXXMS_FLAG)
# define GLM_HAS_UNRESTRICTED_UNIONS 1
#else
# define GLM_HAS_UNRESTRICTED_UNIONS (GLM_LANG & GLM_LANG_CXX0X_FLAG) && (\
((GLM_COMPILER & GLM_COMPILER_VC) && (GLM_LANG & GLM_LANG_CXXMS_FLAG)) || \
((GLM_COMPILER & GLM_COMPILER_VC) && (GLM_COMPILER >= GLM_COMPILER_VC2015)) || \
((GLM_COMPILER & GLM_COMPILER_GCC) && (GLM_COMPILER >= GLM_COMPILER_GCC46)))
#endif
//#define GLM_HAS_ANONYMOUS_UNION (((GLM_LANG & GLM_LANG_CXXMS_FLAG) | (GLM_LANG & GLM_LANG_CXX11_FLAG)) && GLM_NOT_BUGGY_VC32BITS)
// N2346
#if GLM_COMPILER & (GLM_COMPILER_LLVM | GLM_COMPILER_APPLE_CLANG)
# define GLM_HAS_DEFAULTED_FUNCTIONS __has_feature(cxx_defaulted_functions)
@ -803,9 +532,6 @@
# endif// GLM_COMPILER & GLM_COMPILER_VC
#endif
// Not standard
#define GLM_HAS_ANONYMOUS_UNION (GLM_LANG & GLM_LANG_CXXMS_FLAG)
///////////////////////////////////////////////////////////////////////////////////
// Static assert

View File

@ -759,8 +759,6 @@ namespace detail
}
}//namespace glm
#if GLM_HAS_ANONYMOUS_UNION && GLM_NOT_BUGGY_VC32BITS
#if GLM_ARCH & GLM_ARCH_SSE2
# include "type_mat4x4_sse2.inl"
#if GLM_ARCH != GLM_FORCE_PURE
# include "type_mat4x4_simd.inl"
#endif
#endif//

View File

@ -35,7 +35,7 @@
#include "../fwd.hpp"
#include "type_vec.hpp"
#ifdef GLM_SWIZZLE
# if GLM_HAS_ANONYMOUS_UNION
# if GLM_HAS_UNRESTRICTED_UNIONS
# include "_swizzle.hpp"
# else
# include "_swizzle_func.hpp"
@ -56,7 +56,7 @@ namespace glm
// -- Data --
# if GLM_HAS_ANONYMOUS_UNION
# if GLM_HAS_UNRESTRICTED_UNIONS
union
{
T x;
@ -122,13 +122,13 @@ namespace glm
// -- Swizzle constructors --
# if(GLM_HAS_ANONYMOUS_UNION && defined(GLM_SWIZZLE))
# if(GLM_HAS_UNRESTRICTED_UNIONS && defined(GLM_SWIZZLE))
template <int E0>
GLM_FUNC_DECL tvec1(detail::_swizzle<1, T, P, tvec1<T, P>, E0, -1,-2,-3> const & that)
{
*this = that();
}
# endif//(GLM_HAS_ANONYMOUS_UNION && defined(GLM_SWIZZLE))
# endif//(GLM_HAS_UNRESTRICTED_UNIONS && defined(GLM_SWIZZLE))
// -- Unary arithmetic operators --

View File

@ -34,7 +34,7 @@
#include "type_vec.hpp"
#ifdef GLM_SWIZZLE
# if GLM_HAS_ANONYMOUS_UNION
# if GLM_HAS_UNRESTRICTED_UNIONS
# include "_swizzle.hpp"
# else
# include "_swizzle_func.hpp"
@ -55,7 +55,7 @@ namespace glm
// -- Data --
# if GLM_HAS_ANONYMOUS_UNION
# if GLM_HAS_UNRESTRICTED_UNIONS
union
{
struct{ T x, y; };
@ -128,13 +128,13 @@ namespace glm
// -- Swizzle constructors --
# if GLM_HAS_ANONYMOUS_UNION && defined(GLM_SWIZZLE)
# if GLM_HAS_UNRESTRICTED_UNIONS && defined(GLM_SWIZZLE)
template <int E0, int E1>
GLM_FUNC_DECL tvec2(detail::_swizzle<2, T, P, tvec2<T, P>, E0, E1,-1,-2> const & that)
{
*this = that();
}
# endif// GLM_HAS_ANONYMOUS_UNION && defined(GLM_SWIZZLE)
# endif// GLM_HAS_UNRESTRICTED_UNIONS && defined(GLM_SWIZZLE)
// -- Unary arithmetic operators --

View File

@ -34,7 +34,7 @@
#include "type_vec.hpp"
#ifdef GLM_SWIZZLE
# if GLM_HAS_ANONYMOUS_UNION
# if GLM_HAS_UNRESTRICTED_UNIONS
# include "_swizzle.hpp"
# else
# include "_swizzle_func.hpp"
@ -55,7 +55,7 @@ namespace glm
// -- Data --
# if GLM_HAS_ANONYMOUS_UNION
# if GLM_HAS_UNRESTRICTED_UNIONS
union
{
struct{ T x, y, z; };
@ -138,7 +138,7 @@ namespace glm
// -- Swizzle constructors --
# if GLM_HAS_ANONYMOUS_UNION && defined(GLM_SWIZZLE)
# if GLM_HAS_UNRESTRICTED_UNIONS && defined(GLM_SWIZZLE)
template <int E0, int E1, int E2>
GLM_FUNC_DECL tvec3(detail::_swizzle<3, T, P, tvec3<T, P>, E0, E1, E2, -1> const & that)
{
@ -156,7 +156,7 @@ namespace glm
{
*this = tvec3<T, P>(scalar, v());
}
# endif// GLM_HAS_ANONYMOUS_UNION && defined(GLM_SWIZZLE)
# endif// GLM_HAS_UNRESTRICTED_UNIONS && defined(GLM_SWIZZLE)
// -- Unary arithmetic operators --

View File

@ -52,9 +52,7 @@ namespace detail
typedef T type[4];
};
# define GLM_NOT_BUGGY_VC32BITS !(GLM_MODEL == GLM_MODEL_32 && (GLM_COMPILER & GLM_COMPILER_VC) && GLM_COMPILER < GLM_COMPILER_VC2013)
# if (GLM_ARCH & GLM_ARCH_SSE2) && GLM_NOT_BUGGY_VC32BITS
# if (GLM_ARCH & GLM_ARCH_SSE2)
template <>
struct simd_data<float, simd>
{
@ -74,7 +72,7 @@ namespace detail
};
# endif
# if (GLM_ARCH & GLM_ARCH_AVX) && GLM_NOT_BUGGY_VC32BITS
# if (GLM_ARCH & GLM_ARCH_AVX)
template <>
struct simd_data<double, simd>
{
@ -82,7 +80,7 @@ namespace detail
};
# endif
# if (GLM_ARCH & GLM_ARCH_AVX2) && GLM_NOT_BUGGY_VC32BITS
# if (GLM_ARCH & GLM_ARCH_AVX2)
template <>
struct simd_data<int64, simd>
{
@ -109,7 +107,7 @@ namespace detail
// -- Data --
# if GLM_HAS_ANONYMOUS_UNION && GLM_NOT_BUGGY_VC32BITS
# if GLM_HAS_UNRESTRICTED_UNIONS
union
{
struct { T x, y, z, w;};
@ -213,7 +211,7 @@ namespace detail
// -- Swizzle constructors --
# if GLM_HAS_ANONYMOUS_UNION && defined(GLM_SWIZZLE)
# if GLM_HAS_UNRESTRICTED_UNIONS && defined(GLM_SWIZZLE)
template <int E0, int E1, int E2, int E3>
GLM_FUNC_DECL tvec4(detail::_swizzle<4, T, P, tvec4<T, P>, E0, E1, E2, E3> const & that)
{
@ -255,7 +253,7 @@ namespace detail
{
*this = tvec4<T, P>(x, v());
}
# endif// GLM_HAS_ANONYMOUS_UNION && defined(GLM_SWIZZLE)
# endif// GLM_HAS_UNRESTRICTED_UNIONS && defined(GLM_SWIZZLE)
// -- Unary arithmetic operators --

View File

@ -1180,6 +1180,6 @@ namespace glm
}
}//namespace glm
#if GLM_ARCH != GLM_FORCE_PURE && GLM_HAS_ANONYMOUS_UNION && GLM_NOT_BUGGY_VC32BITS
#if GLM_ARCH != GLM_FORCE_PURE
# include "type_vec4_simd.inl"
#endif

View File

@ -32,6 +32,8 @@
namespace glm
{
# if GLM_HAS_UNRESTRICTED_UNIONS
# if !GLM_HAS_DEFAULTED_FUNCTIONS
template <>
GLM_FUNC_QUALIFIER tvec4<float, simd>::tvec4()
@ -90,4 +92,5 @@ namespace glm
Result.data = _mm_mul_ps(v1.data, v2.data);
return Result;
}
#endif//GLM_HAS_UNRESTRICTED_UNIONS
}//namespace glm

169
glm/simd/common.h Normal file
View File

@ -0,0 +1,169 @@
#if(GLM_COMPILER & GLM_COMPILER_VC)
#pragma warning(push)
#pragma warning(disable : 4510 4512 4610)
#endif
union ieee754_QNAN
{
const float f;
struct i
{
const unsigned int mantissa:23, exp:8, sign:1;
};
ieee754_QNAN() : f(0.0)/*, mantissa(0x7FFFFF), exp(0xFF), sign(0x0)*/ {}
};
#if(GLM_COMPILER & GLM_COMPILER_VC)
#pragma warning(pop)
#endif
static const __m128 GLM_VAR_USED glm_zero = _mm_setzero_ps();
static const __m128 GLM_VAR_USED glm_one = _mm_set_ps1(1.0f);
static const __m128 GLM_VAR_USED glm_half = _mm_set_ps1(0.5f);
static const __m128 GLM_VAR_USED glm_minus_one = _mm_set_ps1(-1.0f);
static const __m128 GLM_VAR_USED glm_two = _mm_set_ps1(2.0f);
static const __m128 GLM_VAR_USED glm_three = _mm_set_ps1(3.0f);
static const ieee754_QNAN glm_abs_mask;
static const __m128 GLM_VAR_USED glm_abs4_mask = _mm_set_ps1(glm_abs_mask.f);
static const __m128 GLM_VAR_USED glm_epi32_sign_mask = _mm_castsi128_ps(_mm_set1_epi32(static_cast<int>(0x80000000)));
static const __m128 GLM_VAR_USED glm_ps_2pow23 = _mm_set_ps1(8388608.0f);
static const __m128 GLM_VAR_USED glm_ps_1 = _mm_set_ps1(1.0f);
GLM_FUNC_QUALIFIER __m128 glm_abs_ps(__m128 x)
{
return _mm_and_ps(glm_abs4_mask, x);
}
//sign
GLM_FUNC_QUALIFIER __m128 glm_sgn_ps(__m128 x)
{
__m128 const Cmp0 = _mm_cmplt_ps(x, glm_zero);
__m128 const Cmp1 = _mm_cmpgt_ps(x, glm_zero);
__m128 const And0 = _mm_and_ps(Cmp0, glm_minus_one);
__m128 const And1 = _mm_and_ps(Cmp1, glm_one);
return _mm_or_ps(And0, And1);
}
//round
GLM_FUNC_QUALIFIER __m128 glm_rnd_ps(__m128 x)
{
__m128 const and0 = _mm_and_ps(glm_epi32_sign_mask, x);
__m128 const or0 = _mm_or_ps(and0, glm_ps_2pow23);
__m128 const add0 = _mm_add_ps(x, or0);
__m128 const sub0 = _mm_sub_ps(add0, or0);
return sub0;
}
//floor
GLM_FUNC_QUALIFIER __m128 glm_flr_ps(__m128 x)
{
__m128 const rnd0 = glm_rnd_ps(x);
__m128 const cmp0 = _mm_cmplt_ps(x, rnd0);
__m128 const and0 = _mm_and_ps(cmp0, glm_ps_1);
__m128 const sub0 = _mm_sub_ps(rnd0, and0);
return sub0;
}
//trunc
//GLM_FUNC_QUALIFIER __m128 _mm_trc_ps(__m128 v)
//{
// return __m128();
//}
//roundEven
GLM_FUNC_QUALIFIER __m128 glm_rde_ps(__m128 x)
{
__m128 const and0 = _mm_and_ps(glm_epi32_sign_mask, x);
__m128 const or0 = _mm_or_ps(and0, glm_ps_2pow23);
__m128 const add0 = _mm_add_ps(x, or0);
__m128 const sub0 = _mm_sub_ps(add0, or0);
return sub0;
}
GLM_FUNC_QUALIFIER __m128 glm_ceil_ps(__m128 x)
{
__m128 const rnd0 = glm_rnd_ps(x);
__m128 const cmp0 = _mm_cmpgt_ps(x, rnd0);
__m128 const and0 = _mm_and_ps(cmp0, glm_ps_1);
__m128 const add0 = _mm_add_ps(rnd0, and0);
return add0;
}
GLM_FUNC_QUALIFIER __m128 glm_frc_ps(__m128 x)
{
__m128 const flr0 = glm_flr_ps(x);
__m128 const sub0 = _mm_sub_ps(x, flr0);
return sub0;
}
GLM_FUNC_QUALIFIER __m128 glm_mod_ps(__m128 x, __m128 y)
{
__m128 const div0 = _mm_div_ps(x, y);
__m128 const flr0 = glm_flr_ps(div0);
__m128 const mul0 = _mm_mul_ps(y, flr0);
__m128 const sub0 = _mm_sub_ps(x, mul0);
return sub0;
}
GLM_FUNC_QUALIFIER __m128 glm_clp_ps(__m128 v, __m128 minVal, __m128 maxVal)
{
__m128 const min0 = _mm_min_ps(v, maxVal);
__m128 const max0 = _mm_max_ps(min0, minVal);
return max0;
}
GLM_FUNC_QUALIFIER __m128 glm_mix_ps(__m128 v1, __m128 v2, __m128 a)
{
__m128 const sub0 = _mm_sub_ps(glm_one, a);
__m128 const mul0 = _mm_mul_ps(v1, sub0);
__m128 const mul1 = _mm_mul_ps(v2, a);
__m128 const add0 = _mm_add_ps(mul0, mul1);
return add0;
}
//step
GLM_FUNC_QUALIFIER __m128 glm_stp_ps(__m128 edge, __m128 x)
{
__m128 const cmp = _mm_cmple_ps(x, edge);
return _mm_movemask_ps(cmp) == 0 ? glm_one : glm_zero;
}
// smoothstep
GLM_FUNC_QUALIFIER __m128 glm_ssp_ps(__m128 edge0, __m128 edge1, __m128 x)
{
__m128 const sub0 = _mm_sub_ps(x, edge0);
__m128 const sub1 = _mm_sub_ps(edge1, edge0);
__m128 const div0 = _mm_sub_ps(sub0, sub1);
__m128 const clp0 = glm_clp_ps(div0, glm_zero, glm_one);
__m128 const mul0 = _mm_mul_ps(glm_two, clp0);
__m128 const sub2 = _mm_sub_ps(glm_three, mul0);
__m128 const mul1 = _mm_mul_ps(clp0, clp0);
__m128 const mul2 = _mm_mul_ps(mul1, sub2);
return mul2;
}
/// \todo
//GLM_FUNC_QUALIFIER __m128 glm_nan_ps(__m128 x)
//{
// __m128 empty;
// return empty;
//}
/// \todo
//GLM_FUNC_QUALIFIER __m128 glm_inf_ps(__m128 x)
//{
// __m128 empty;
// return empty;
//}
// SSE scalar reciprocal sqrt using rsqrt op, plus one Newton-Rhaphson iteration
// By Elan Ruskin, http://assemblyrequired.crashworks.org/
GLM_FUNC_QUALIFIER __m128 glm_sqrt_wip_ss(__m128 x)
{
__m128 const recip = _mm_rsqrt_ss(x); // "estimate" opcode
__m128 const halfrecip = _mm_mul_ss(glm_half, recip);
__m128 const threeminus_xrr = _mm_sub_ss(glm_three, _mm_mul_ss(x, _mm_mul_ss(recip, recip)));
return _mm_mul_ss(halfrecip, threeminus_xrr);
}

0
glm/simd/exponential.h Normal file
View File

101
glm/simd/geometric.h Normal file
View File

@ -0,0 +1,101 @@
#pragma once
#include "common.h"
GLM_FUNC_QUALIFIER __m128 glm_dot_ps(__m128 v1, __m128 v2)
{
# if GLM_ARCH & GLM_ARCH_AVX
return _mm_dp_ps(v1, v2, 0xff);
# else
__m128 const mul0 = _mm_mul_ps(v1, v2);
__m128 const swp0 = _mm_shuffle_ps(mul0, mul0, _MM_SHUFFLE(2, 3, 0, 1));
__m128 const add0 = _mm_add_ps(mul0, swp0);
__m128 const swp1 = _mm_shuffle_ps(add0, add0, _MM_SHUFFLE(0, 1, 2, 3));
__m128 const add1 = _mm_add_ps(add0, swp1);
return add1;
# endif
}
GLM_FUNC_QUALIFIER __m128 glm_dot_ss(__m128 v1, __m128 v2)
{
__m128 const mul0 = _mm_mul_ps(v1, v2);
__m128 const mov0 = _mm_movehl_ps(mul0, mul0);
__m128 const add0 = _mm_add_ps(mov0, mul0);
__m128 const swp1 = _mm_shuffle_ps(add0, add0, 1);
__m128 const add1 = _mm_add_ss(add0, swp1);
return add1;
}
GLM_FUNC_QUALIFIER __m128 glm_len_ps(__m128 x)
{
__m128 const dot0 = glm_dot_ps(x, x);
__m128 const sqt0 = _mm_sqrt_ps(dot0);
return sqt0;
}
GLM_FUNC_QUALIFIER __m128 glm_dst_ps(__m128 p0, __m128 p1)
{
__m128 sub0 = _mm_sub_ps(p0, p1);
__m128 len0 = glm_len_ps(sub0);
return len0;
}
GLM_FUNC_QUALIFIER __m128 glm_xpd_ps(__m128 v1, __m128 v2)
{
__m128 swp0 = _mm_shuffle_ps(v1, v1, _MM_SHUFFLE(3, 0, 2, 1));
__m128 swp1 = _mm_shuffle_ps(v1, v1, _MM_SHUFFLE(3, 1, 0, 2));
__m128 swp2 = _mm_shuffle_ps(v2, v2, _MM_SHUFFLE(3, 0, 2, 1));
__m128 swp3 = _mm_shuffle_ps(v2, v2, _MM_SHUFFLE(3, 1, 0, 2));
__m128 mul0 = _mm_mul_ps(swp0, swp3);
__m128 mul1 = _mm_mul_ps(swp1, swp2);
__m128 sub0 = _mm_sub_ps(mul0, mul1);
return sub0;
}
GLM_FUNC_QUALIFIER __m128 glm_nrm_ps(__m128 v)
{
__m128 dot0 = glm_dot_ps(v, v);
__m128 isr0 = _mm_rsqrt_ps(dot0);
__m128 mul0 = _mm_mul_ps(v, isr0);
return mul0;
}
GLM_FUNC_QUALIFIER __m128 glm_ffd_ps(__m128 N, __m128 I, __m128 Nref)
{
__m128 dot0 = glm_dot_ps(Nref, I);
__m128 sgn0 = glm_sgn_ps(dot0);
__m128 mul0 = _mm_mul_ps(sgn0, glm_minus_one);
__m128 mul1 = _mm_mul_ps(N, mul0);
return mul1;
}
GLM_FUNC_QUALIFIER __m128 glm_rfe_ps(__m128 I, __m128 N)
{
__m128 dot0 = glm_dot_ps(N, I);
__m128 mul0 = _mm_mul_ps(N, dot0);
__m128 mul1 = _mm_mul_ps(mul0, glm_two);
__m128 sub0 = _mm_sub_ps(I, mul1);
return sub0;
}
GLM_FUNC_QUALIFIER __m128 glm_rfa_ps(__m128 I, __m128 N, __m128 eta)
{
__m128 dot0 = glm_dot_ps(N, I);
__m128 mul0 = _mm_mul_ps(eta, eta);
__m128 mul1 = _mm_mul_ps(dot0, dot0);
__m128 sub0 = _mm_sub_ps(glm_one, mul0);
__m128 sub1 = _mm_sub_ps(glm_one, mul1);
__m128 mul2 = _mm_mul_ps(sub0, sub1);
if(_mm_movemask_ps(_mm_cmplt_ss(mul2, glm_zero)) == 0)
return glm_zero;
__m128 sqt0 = _mm_sqrt_ps(mul2);
__m128 mul3 = _mm_mul_ps(eta, dot0);
__m128 add0 = _mm_add_ps(mul3, sqt0);
__m128 mul4 = _mm_mul_ps(add0, N);
__m128 mul5 = _mm_mul_ps(eta, I);
__m128 sub2 = _mm_sub_ps(mul5, mul4);
return sub2;
}

0
glm/simd/integer.h Normal file
View File

1034
glm/simd/matrix.h Normal file

File diff suppressed because it is too large Load Diff

0
glm/simd/packing.h Normal file
View File

280
glm/simd/platform.h Normal file
View File

@ -0,0 +1,280 @@
/// @ref simd
/// @file glm/simd/platform.h
#pragma once
///////////////////////////////////////////////////////////////////////////////////
// Platform
#define GLM_PLATFORM_UNKNOWN 0x00000000
#define GLM_PLATFORM_WINDOWS 0x00010000
#define GLM_PLATFORM_LINUX 0x00020000
#define GLM_PLATFORM_APPLE 0x00040000
//#define GLM_PLATFORM_IOS 0x00080000
#define GLM_PLATFORM_ANDROID 0x00100000
#define GLM_PLATFORM_CHROME_NACL 0x00200000
#define GLM_PLATFORM_UNIX 0x00400000
#define GLM_PLATFORM_QNXNTO 0x00800000
#define GLM_PLATFORM_WINCE 0x01000000
#define GLM_PLATFORM_CYGWIN 0x02000000
#ifdef GLM_FORCE_PLATFORM_UNKNOWN
# define GLM_PLATFORM GLM_PLATFORM_UNKNOWN
#elif defined(__CYGWIN__)
# define GLM_PLATFORM GLM_PLATFORM_CYGWIN
#elif defined(__QNXNTO__)
# define GLM_PLATFORM GLM_PLATFORM_QNXNTO
#elif defined(__APPLE__)
# define GLM_PLATFORM GLM_PLATFORM_APPLE
#elif defined(WINCE)
# define GLM_PLATFORM GLM_PLATFORM_WINCE
#elif defined(_WIN32)
# define GLM_PLATFORM GLM_PLATFORM_WINDOWS
#elif defined(__native_client__)
# define GLM_PLATFORM GLM_PLATFORM_CHROME_NACL
#elif defined(__ANDROID__)
# define GLM_PLATFORM GLM_PLATFORM_ANDROID
#elif defined(__linux)
# define GLM_PLATFORM GLM_PLATFORM_LINUX
#elif defined(__unix)
# define GLM_PLATFORM GLM_PLATFORM_UNIX
#else
# define GLM_PLATFORM GLM_PLATFORM_UNKNOWN
#endif//
// Report platform detection
#if(defined(GLM_MESSAGES) && !defined(GLM_MESSAGE_PLATFORM_DISPLAYED))
# define GLM_MESSAGE_PLATFORM_DISPLAYED
# if(GLM_PLATFORM & GLM_PLATFORM_QNXNTO)
# pragma message("GLM: QNX platform detected")
//# elif(GLM_PLATFORM & GLM_PLATFORM_IOS)
//# pragma message("GLM: iOS platform detected")
# elif(GLM_PLATFORM & GLM_PLATFORM_APPLE)
# pragma message("GLM: Apple platform detected")
# elif(GLM_PLATFORM & GLM_PLATFORM_WINCE)
# pragma message("GLM: WinCE platform detected")
# elif(GLM_PLATFORM & GLM_PLATFORM_WINDOWS)
# pragma message("GLM: Windows platform detected")
# elif(GLM_PLATFORM & GLM_PLATFORM_CHROME_NACL)
# pragma message("GLM: Native Client detected")
# elif(GLM_PLATFORM & GLM_PLATFORM_ANDROID)
# pragma message("GLM: Android platform detected")
# elif(GLM_PLATFORM & GLM_PLATFORM_LINUX)
# pragma message("GLM: Linux platform detected")
# elif(GLM_PLATFORM & GLM_PLATFORM_UNIX)
# pragma message("GLM: UNIX platform detected")
# elif(GLM_PLATFORM & GLM_PLATFORM_UNKNOWN)
# pragma message("GLM: platform unknown")
# else
# pragma message("GLM: platform not detected")
# endif
#endif//GLM_MESSAGE
///////////////////////////////////////////////////////////////////////////////////
// Compiler
#define GLM_COMPILER_UNKNOWN 0x00000000
// Intel
#define GLM_COMPILER_INTEL 0x00100000
#define GLM_COMPILER_INTEL12 0x00100010
#define GLM_COMPILER_INTEL12_1 0x00100020
#define GLM_COMPILER_INTEL13 0x00100030
#define GLM_COMPILER_INTEL14 0x00100040
#define GLM_COMPILER_INTEL15 0x00100050
#define GLM_COMPILER_INTEL16 0x00100060
// Visual C++ defines
#define GLM_COMPILER_VC 0x01000000
#define GLM_COMPILER_VC2010 0x01000090
#define GLM_COMPILER_VC2012 0x010000A0
#define GLM_COMPILER_VC2013 0x010000B0
#define GLM_COMPILER_VC2015 0x010000C0
// GCC defines
#define GLM_COMPILER_GCC 0x02000000
#define GLM_COMPILER_GCC44 0x020000B0
#define GLM_COMPILER_GCC45 0x020000C0
#define GLM_COMPILER_GCC46 0x020000D0
#define GLM_COMPILER_GCC47 0x020000E0
#define GLM_COMPILER_GCC48 0x020000F0
#define GLM_COMPILER_GCC49 0x02000100
#define GLM_COMPILER_GCC50 0x02000200
#define GLM_COMPILER_GCC51 0x02000300
#define GLM_COMPILER_GCC52 0x02000400
#define GLM_COMPILER_GCC53 0x02000500
#define GLM_COMPILER_GCC60 0x02000600
// CUDA
#define GLM_COMPILER_CUDA 0x10000000
#define GLM_COMPILER_CUDA40 0x10000040
#define GLM_COMPILER_CUDA41 0x10000050
#define GLM_COMPILER_CUDA42 0x10000060
#define GLM_COMPILER_CUDA50 0x10000070
#define GLM_COMPILER_CUDA60 0x10000080
#define GLM_COMPILER_CUDA65 0x10000090
#define GLM_COMPILER_CUDA70 0x100000A0
#define GLM_COMPILER_CUDA75 0x100000B0
// LLVM
#define GLM_COMPILER_LLVM 0x20000000
#define GLM_COMPILER_LLVM32 0x20000030
#define GLM_COMPILER_LLVM33 0x20000040
#define GLM_COMPILER_LLVM34 0x20000050
#define GLM_COMPILER_LLVM35 0x20000060
#define GLM_COMPILER_LLVM36 0x20000070
#define GLM_COMPILER_LLVM37 0x20000080
#define GLM_COMPILER_LLVM38 0x20000090
#define GLM_COMPILER_LLVM39 0x200000A0
// Apple Clang
#define GLM_COMPILER_APPLE_CLANG 0x40000000
#define GLM_COMPILER_APPLE_CLANG40 0x40000010
#define GLM_COMPILER_APPLE_CLANG41 0x40000020
#define GLM_COMPILER_APPLE_CLANG42 0x40000030
#define GLM_COMPILER_APPLE_CLANG50 0x40000040
#define GLM_COMPILER_APPLE_CLANG51 0x40000050
#define GLM_COMPILER_APPLE_CLANG60 0x40000060
#define GLM_COMPILER_APPLE_CLANG61 0x40000070
// Build model
#define GLM_MODEL_32 0x00000010
#define GLM_MODEL_64 0x00000020
// Force generic C++ compiler
#ifdef GLM_FORCE_COMPILER_UNKNOWN
# define GLM_COMPILER GLM_COMPILER_UNKNOWN
#elif defined(__INTEL_COMPILER)
# if __INTEL_COMPILER == 1200
# define GLM_COMPILER GLM_COMPILER_INTEL12
# elif __INTEL_COMPILER == 1210
# define GLM_COMPILER GLM_COMPILER_INTEL12_1
# elif __INTEL_COMPILER == 1300
# define GLM_COMPILER GLM_COMPILER_INTEL13
# elif __INTEL_COMPILER == 1400
# define GLM_COMPILER GLM_COMPILER_INTEL14
# elif __INTEL_COMPILER == 1500
# define GLM_COMPILER GLM_COMPILER_INTEL15
# elif __INTEL_COMPILER >= 1600
# define GLM_COMPILER GLM_COMPILER_INTEL16
# else
# define GLM_COMPILER GLM_COMPILER_INTEL
# endif
// CUDA
#elif defined(__CUDACC__)
# if !defined(CUDA_VERSION) && !defined(GLM_FORCE_CUDA)
# include <cuda.h> // make sure version is defined since nvcc does not define it itself!
# endif
# if CUDA_VERSION < 3000
# error "GLM requires CUDA 3.0 or higher"
# else
# define GLM_COMPILER GLM_COMPILER_CUDA
# endif
// Visual C++
#elif defined(_MSC_VER)
# if _MSC_VER < 1600
# error "GLM requires Visual C++ 2010 or higher"
# elif _MSC_VER == 1600
# define GLM_COMPILER GLM_COMPILER_VC2010
# elif _MSC_VER == 1700
# define GLM_COMPILER GLM_COMPILER_VC2012
# elif _MSC_VER == 1800
# define GLM_COMPILER GLM_COMPILER_VC2013
# elif _MSC_VER >= 1900
# define GLM_COMPILER GLM_COMPILER_VC2015
# else//_MSC_VER
# define GLM_COMPILER GLM_COMPILER_VC
# endif//_MSC_VER
// Clang
#elif defined(__clang__)
# if GLM_PLATFORM & GLM_PLATFORM_APPLE
# if __clang_major__ == 4 && __clang_minor__ == 0
# define GLM_COMPILER GLM_COMPILER_APPLE_CLANG40
# elif __clang_major__ == 4 && __clang_minor__ == 1
# define GLM_COMPILER GLM_COMPILER_APPLE_CLANG41
# elif __clang_major__ == 4 && __clang_minor__ == 2
# define GLM_COMPILER GLM_COMPILER_APPLE_CLANG42
# elif __clang_major__ == 5 && __clang_minor__ == 0
# define GLM_COMPILER GLM_COMPILER_APPLE_CLANG50
# elif __clang_major__ == 5 && __clang_minor__ == 1
# define GLM_COMPILER GLM_COMPILER_APPLE_CLANG51
# elif __clang_major__ == 6 && __clang_minor__ == 0
# define GLM_COMPILER GLM_COMPILER_APPLE_CLANG60
# elif __clang_major__ == 6 && __clang_minor__ >= 1
# define GLM_COMPILER GLM_COMPILER_APPLE_CLANG61
# elif __clang_major__ >= 7
# define GLM_COMPILER GLM_COMPILER_APPLE_CLANG61
# else
# define GLM_COMPILER GLM_COMPILER_APPLE_CLANG
# endif
# else
# if __clang_major__ == 3 && __clang_minor__ == 0
# define GLM_COMPILER GLM_COMPILER_LLVM30
# elif __clang_major__ == 3 && __clang_minor__ == 1
# define GLM_COMPILER GLM_COMPILER_LLVM31
# elif __clang_major__ == 3 && __clang_minor__ == 2
# define GLM_COMPILER GLM_COMPILER_LLVM32
# elif __clang_major__ == 3 && __clang_minor__ == 3
# define GLM_COMPILER GLM_COMPILER_LLVM33
# elif __clang_major__ == 3 && __clang_minor__ == 4
# define GLM_COMPILER GLM_COMPILER_LLVM34
# elif __clang_major__ == 3 && __clang_minor__ == 5
# define GLM_COMPILER GLM_COMPILER_LLVM35
# elif __clang_major__ == 3 && __clang_minor__ == 6
# define GLM_COMPILER GLM_COMPILER_LLVM36
# elif __clang_major__ == 3 && __clang_minor__ == 7
# define GLM_COMPILER GLM_COMPILER_LLVM37
# elif __clang_major__ == 3 && __clang_minor__ == 8
# define GLM_COMPILER GLM_COMPILER_LLVM38
# elif __clang_major__ == 3 && __clang_minor__ >= 9
# define GLM_COMPILER GLM_COMPILER_LLVM39
# elif __clang_major__ >= 4
# define GLM_COMPILER GLM_COMPILER_LLVM39
# else
# define GLM_COMPILER GLM_COMPILER_LLVM
# endif
# endif
// G++
#elif defined(__GNUC__) || defined(__MINGW32__)
# if (__GNUC__ == 4) && (__GNUC_MINOR__ == 2)
# define GLM_COMPILER (GLM_COMPILER_GCC42)
# elif (__GNUC__ == 4) && (__GNUC_MINOR__ == 3)
# define GLM_COMPILER (GLM_COMPILER_GCC43)
# elif (__GNUC__ == 4) && (__GNUC_MINOR__ == 4)
# define GLM_COMPILER (GLM_COMPILER_GCC44)
# elif (__GNUC__ == 4) && (__GNUC_MINOR__ == 5)
# define GLM_COMPILER (GLM_COMPILER_GCC45)
# elif (__GNUC__ == 4) && (__GNUC_MINOR__ == 6)
# define GLM_COMPILER (GLM_COMPILER_GCC46)
# elif (__GNUC__ == 4) && (__GNUC_MINOR__ == 7)
# define GLM_COMPILER (GLM_COMPILER_GCC47)
# elif (__GNUC__ == 4) && (__GNUC_MINOR__ == 8)
# define GLM_COMPILER (GLM_COMPILER_GCC48)
# elif (__GNUC__ == 4) && (__GNUC_MINOR__ >= 9)
# define GLM_COMPILER (GLM_COMPILER_GCC49)
# elif (__GNUC__ == 5) && (__GNUC_MINOR__ == 0)
# define GLM_COMPILER (GLM_COMPILER_GCC50)
# elif (__GNUC__ == 5) && (__GNUC_MINOR__ == 1)
# define GLM_COMPILER (GLM_COMPILER_GCC51)
# elif (__GNUC__ == 5) && (__GNUC_MINOR__ == 2)
# define GLM_COMPILER (GLM_COMPILER_GCC52)
# elif (__GNUC__ == 5) && (__GNUC_MINOR__ >= 3)
# define GLM_COMPILER (GLM_COMPILER_GCC53)
# elif (__GNUC__ >= 6)
# define GLM_COMPILER (GLM_COMPILER_GCC60)
# else
# define GLM_COMPILER (GLM_COMPILER_GCC)
# endif
#else
# define GLM_COMPILER GLM_COMPILER_UNKNOWN
#endif
#ifndef GLM_COMPILER
# error "GLM_COMPILER undefined, your compiler may not be supported by GLM. Add #define GLM_COMPILER 0 to ignore this message."
#endif//GLM_COMPILER

0
glm/simd/trigonometric.h Normal file
View File

View File

View File

@ -96,7 +96,7 @@ int test_vec1_ctor()
}
#endif
*/
#if GLM_HAS_ANONYMOUS_UNION && defined(GLM_SWIZZLE)
#if GLM_HAS_UNRESTRICTED_UNIONS && defined(GLM_SWIZZLE)
{
glm::vec2 A = glm::vec2(1.0f, 2.0f);
glm::vec2 B = A.xy;
@ -107,7 +107,7 @@ int test_vec1_ctor()
Error += glm::all(glm::equal(A, C)) ? 0 : 1;
Error += glm::all(glm::equal(A, D)) ? 0 : 1;
}
#endif// GLM_HAS_ANONYMOUS_UNION && defined(GLM_SWIZZLE)
#endif//GLM_HAS_UNRESTRICTED_UNIONS && defined(GLM_SWIZZLE)
{
glm::vec2 A = glm::vec2(2.0f);

View File

@ -262,7 +262,7 @@ int test_vec2_ctor()
}
#endif
#if GLM_HAS_ANONYMOUS_UNION && defined(GLM_SWIZZLE)
#if GLM_HAS_UNRESTRICTED_UNIONS && defined(GLM_SWIZZLE)
{
glm::vec2 A = glm::vec2(1.0f, 2.0f);
glm::vec2 B = A.xy;
@ -273,7 +273,7 @@ int test_vec2_ctor()
Error += glm::all(glm::equal(A, C)) ? 0 : 1;
Error += glm::all(glm::equal(A, D)) ? 0 : 1;
}
#endif// GLM_HAS_ANONYMOUS_UNION && defined(GLM_SWIZZLE)
#endif//GLM_HAS_UNRESTRICTED_UNIONS && defined(GLM_SWIZZLE)
{
glm::vec2 A = glm::vec2(2.0f);

View File

@ -71,7 +71,7 @@ int test_vec3_ctor()
}
#endif
#if(GLM_HAS_ANONYMOUS_UNION && defined(GLM_SWIZZLE))
#if(GLM_HAS_UNRESTRICTED_UNIONS && defined(GLM_SWIZZLE))
{
glm::vec3 A = glm::vec3(1.0f, 2.0f, 3.0f);
glm::vec3 B = A.xyz;
@ -90,7 +90,7 @@ int test_vec3_ctor()
Error += glm::all(glm::equal(A, G)) ? 0 : 1;
Error += glm::all(glm::equal(A, H)) ? 0 : 1;
}
#endif//(GLM_HAS_ANONYMOUS_UNION && defined(GLM_SWIZZLE))
#endif//(GLM_HAS_UNRESTRICTED_UNIONS && defined(GLM_SWIZZLE))
{
glm::vec3 A(1);

View File

@ -98,7 +98,7 @@ int test_vec4_ctor()
}
#endif
#if GLM_HAS_ANONYMOUS_UNION && defined(GLM_SWIZZLE)
#if GLM_HAS_UNRESTRICTED_UNIONS && defined(GLM_SWIZZLE)
{
glm::vec4 A = glm::vec4(1.0f, 2.0f, 3.0f, 4.0f);
glm::vec4 B = A.xyzw;
@ -127,7 +127,7 @@ int test_vec4_ctor()
Error += glm::all(glm::equal(A, L)) ? 0 : 1;
Error += glm::all(glm::equal(A, M)) ? 0 : 1;
}
#endif// GLM_HAS_ANONYMOUS_UNION && defined(GLM_SWIZZLE)
#endif// GLM_HAS_UNRESTRICTED_UNIONS && defined(GLM_SWIZZLE)
{
glm::vec4 A(1);
@ -334,7 +334,7 @@ int test_vec4_swizzle_partial()
glm::vec4 A(1, 2, 3, 4);
# if GLM_HAS_ANONYMOUS_UNION && defined(GLM_SWIZZLE_RELAX)
# if GLM_HAS_UNRESTRICTED_UNIONS && defined(GLM_SWIZZLE_RELAX)
{
glm::vec4 B(A.xy, A.zw);
Error += A == B ? 0 : 1;