Updated SIMD implementation
This commit is contained in:
parent
86328415fa
commit
c2792febb2
@ -122,6 +122,218 @@ namespace glm
|
||||
detail::tvec4<float> vec4_cast(
|
||||
detail::fvec4SIMD const & x);
|
||||
|
||||
//! Returns x if x >= 0; otherwise, it returns -x.
|
||||
//! (From GLM_GTX_simd_vec4 extension, common function)
|
||||
detail::fvec4SIMD abs(detail::fvec4SIMD const & x);
|
||||
|
||||
//! Returns 1.0 if x > 0, 0.0 if x = 0, or -1.0 if x < 0.
|
||||
//! (From GLM_GTX_simd_vec4 extension, common function)
|
||||
detail::fvec4SIMD sign(detail::fvec4SIMD const & x);
|
||||
|
||||
//! Returns a value equal to the nearest integer that is less then or equal to x.
|
||||
//! (From GLM_GTX_simd_vec4 extension, common function)
|
||||
detail::fvec4SIMD floor(detail::fvec4SIMD const & x);
|
||||
|
||||
//! Returns a value equal to the nearest integer to x
|
||||
//! whose absolute value is not larger than the absolute value of x.
|
||||
//! (From GLM_GTX_simd_vec4 extension, common function)
|
||||
detail::fvec4SIMD trunc(detail::fvec4SIMD const & x);
|
||||
|
||||
//! Returns a value equal to the nearest integer to x.
|
||||
//! The fraction 0.5 will round in a direction chosen by the
|
||||
//! implementation, presumably the direction that is fastest.
|
||||
//! This includes the possibility that round(x) returns the
|
||||
//! same value as roundEven(x) for all values of x.
|
||||
//! (From GLM_GTX_simd_vec4 extension, common function)
|
||||
detail::fvec4SIMD round(detail::fvec4SIMD const & x);
|
||||
|
||||
//! Returns a value equal to the nearest integer to x.
|
||||
//! A fractional part of 0.5 will round toward the nearest even
|
||||
//! integer. (Both 3.5 and 4.5 for x will return 4.0.)
|
||||
//! (From GLM_GTX_simd_vec4 extension, common function)
|
||||
//detail::fvec4SIMD roundEven(detail::fvec4SIMD const & x);
|
||||
|
||||
//! Returns a value equal to the nearest integer
|
||||
//! that is greater than or equal to x.
|
||||
//! (From GLM_GTX_simd_vec4 extension, common function)
|
||||
detail::fvec4SIMD ceil(detail::fvec4SIMD const & x);
|
||||
|
||||
//! Return x - floor(x).
|
||||
//! (From GLM_GTX_simd_vec4 extension, common function)
|
||||
detail::fvec4SIMD fract(detail::fvec4SIMD const & x);
|
||||
|
||||
//! Modulus. Returns x - y * floor(x / y)
|
||||
//! for each component in x using the floating point value y.
|
||||
//! (From GLM_GTX_simd_vec4 extension, common function)
|
||||
detail::fvec4SIMD mod(
|
||||
detail::fvec4SIMD const & x,
|
||||
detail::fvec4SIMD const & y);
|
||||
|
||||
//! Modulus. Returns x - y * floor(x / y)
|
||||
//! for each component in x using the floating point value y.
|
||||
//! (From GLM_GTX_simd_vec4 extension, common function)
|
||||
detail::fvec4SIMD mod(
|
||||
detail::fvec4SIMD const & x,
|
||||
float const & y);
|
||||
|
||||
//! Returns the fractional part of x and sets i to the integer
|
||||
//! part (as a whole number floating point value). Both the
|
||||
//! return value and the output parameter will have the same
|
||||
//! sign as x.
|
||||
//! (From GLM_GTX_simd_vec4 extension, common function)
|
||||
//detail::fvec4SIMD modf(
|
||||
// detail::fvec4SIMD const & x,
|
||||
// detail::fvec4SIMD & i);
|
||||
|
||||
//! Returns y if y < x; otherwise, it returns x.
|
||||
//! (From GLM_GTX_simd_vec4 extension, common function)
|
||||
detail::fvec4SIMD min(
|
||||
detail::fvec4SIMD const & x,
|
||||
detail::fvec4SIMD const & y);
|
||||
|
||||
detail::fvec4SIMD min(
|
||||
detail::fvec4SIMD const & x,
|
||||
float const & y);
|
||||
|
||||
//! Returns y if x < y; otherwise, it returns x.
|
||||
//! (From GLM_GTX_simd_vec4 extension, common function)
|
||||
detail::fvec4SIMD max(
|
||||
detail::fvec4SIMD const & x,
|
||||
detail::fvec4SIMD const & y);
|
||||
|
||||
detail::fvec4SIMD max(
|
||||
detail::fvec4SIMD const & x,
|
||||
float const & y);
|
||||
|
||||
//! Returns min(max(x, minVal), maxVal) for each component in x
|
||||
//! using the floating-point values minVal and maxVal.
|
||||
//! (From GLM_GTX_simd_vec4 extension, common function)
|
||||
detail::fvec4SIMD clamp(
|
||||
detail::fvec4SIMD const & x,
|
||||
detail::fvec4SIMD const & minVal,
|
||||
detail::fvec4SIMD const & maxVal);
|
||||
|
||||
detail::fvec4SIMD clamp(
|
||||
detail::fvec4SIMD const & x,
|
||||
float const & minVal,
|
||||
float const & maxVal);
|
||||
|
||||
//! \return If genTypeU is a floating scalar or vector:
|
||||
//! Returns x * (1.0 - a) + y * a, i.e., the linear blend of
|
||||
//! x and y using the floating-point value a.
|
||||
//! The value for a is not restricted to the range [0, 1].
|
||||
//!
|
||||
//! \return If genTypeU is a boolean scalar or vector:
|
||||
//! Selects which vector each returned component comes
|
||||
//! from. For a component of a that is false, the
|
||||
//! corresponding component of x is returned. For a
|
||||
//! component of a that is true, the corresponding
|
||||
//! component of y is returned. Components of x and y that
|
||||
//! are not selected are allowed to be invalid floating point
|
||||
//! values and will have no effect on the results. Thus, this
|
||||
//! provides different functionality than
|
||||
//! genType mix(genType x, genType y, genType(a))
|
||||
//! where a is a Boolean vector.
|
||||
//!
|
||||
//! From GLSL 1.30.08 specification, section 8.3
|
||||
//!
|
||||
//! \param[in] x Floating point scalar or vector.
|
||||
//! \param[in] y Floating point scalar or vector.
|
||||
//! \param[in] a Floating point or boolean scalar or vector.
|
||||
//!
|
||||
// \todo Test when 'a' is a boolean.
|
||||
//! (From GLM_GTX_simd_vec4 extension, common function)
|
||||
detail::fvec4SIMD mix(
|
||||
detail::fvec4SIMD const & x,
|
||||
detail::fvec4SIMD const & y,
|
||||
detail::fvec4SIMD const & a);
|
||||
|
||||
//! Returns 0.0 if x < edge, otherwise it returns 1.0.
|
||||
//! (From GLM_GTX_simd_vec4 extension, common function)
|
||||
detail::fvec4SIMD step(
|
||||
detail::fvec4SIMD const & edge,
|
||||
detail::fvec4SIMD const & x);
|
||||
|
||||
detail::fvec4SIMD step(
|
||||
float const & edge,
|
||||
detail::fvec4SIMD const & x);
|
||||
|
||||
//! Returns 0.0 if x <= edge0 and 1.0 if x >= edge1 and
|
||||
//! performs smooth Hermite interpolation between 0 and 1
|
||||
//! when edge0 < x < edge1. This is useful in cases where
|
||||
//! you would want a threshold function with a smooth
|
||||
//! transition. This is equivalent to:
|
||||
//! genType t;
|
||||
//! t = clamp ((x – edge0) / (edge1 – edge0), 0, 1);
|
||||
//! return t * t * (3 – 2 * t);
|
||||
//! Results are undefined if edge0 >= edge1.
|
||||
//! (From GLM_GTX_simd_vec4 extension, common function)
|
||||
detail::fvec4SIMD smoothstep(
|
||||
detail::fvec4SIMD const & edge0,
|
||||
detail::fvec4SIMD const & edge1,
|
||||
detail::fvec4SIMD const & x);
|
||||
|
||||
detail::fvec4SIMD smoothstep(
|
||||
float const & edge0,
|
||||
float const & edge1,
|
||||
detail::fvec4SIMD const & x);
|
||||
|
||||
//! Returns true if x holds a NaN (not a number)
|
||||
//! representation in the underlying implementation's set of
|
||||
//! floating point representations. Returns false otherwise,
|
||||
//! including for implementations with no NaN
|
||||
//! representations.
|
||||
//! (From GLM_GTX_simd_vec4 extension, common function)
|
||||
//bvec4 isnan(detail::fvec4SIMD const & x);
|
||||
|
||||
//! Returns true if x holds a positive infinity or negative
|
||||
//! infinity representation in the underlying implementation's
|
||||
//! set of floating point representations. Returns false
|
||||
//! otherwise, including for implementations with no infinity
|
||||
//! representations.
|
||||
//! (From GLM_GTX_simd_vec4 extension, common function)
|
||||
//bvec4 isinf(detail::fvec4SIMD const & x);
|
||||
|
||||
//! Returns a signed or unsigned integer value representing
|
||||
//! the encoding of a floating-point value. The floatingpoint
|
||||
//! value's bit-level representation is preserved.
|
||||
//! (From GLM_GTX_simd_vec4 extension, common function)
|
||||
//detail::ivec4SIMD floatBitsToInt(detail::fvec4SIMD const & value);
|
||||
|
||||
//! Returns a floating-point value corresponding to a signed
|
||||
//! or unsigned integer encoding of a floating-point value.
|
||||
//! If an inf or NaN is passed in, it will not signal, and the
|
||||
//! resulting floating point value is unspecified. Otherwise,
|
||||
//! the bit-level representation is preserved.
|
||||
//! (From GLM_GTX_simd_vec4 extension, common function)
|
||||
//detail::fvec4SIMD intBitsToFloat(detail::ivec4SIMD const & value);
|
||||
|
||||
//! Computes and returns a * b + c.
|
||||
//! (From GLM_GTX_simd_vec4 extension, common function)
|
||||
detail::fvec4SIMD fma(
|
||||
detail::fvec4SIMD const & a,
|
||||
detail::fvec4SIMD const & b,
|
||||
detail::fvec4SIMD const & c);
|
||||
|
||||
//! Splits x into a floating-point significand in the range
|
||||
//! [0.5, 1.0) and an integral exponent of two, such that:
|
||||
//! x = significand * exp(2, exponent)
|
||||
//! The significand is returned by the function and the
|
||||
//! exponent is returned in the parameter exp. For a
|
||||
//! floating-point value of zero, the significant and exponent
|
||||
//! are both zero. For a floating-point value that is an
|
||||
//! infinity or is not a number, the results are undefined.
|
||||
//! (From GLM_GTX_simd_vec4 extension, common function)
|
||||
//detail::fvec4SIMD frexp(detail::fvec4SIMD const & x, detail::ivec4SIMD & exp);
|
||||
|
||||
//! Builds a floating-point number from x and the
|
||||
//! corresponding integral exponent of two in exp, returning:
|
||||
//! significand * exp(2, exponent)
|
||||
//! If this product is too large to be represented in the
|
||||
//! floating-point type, the result is undefined.
|
||||
//! (From GLM_GTX_simd_vec4 extension, common function)
|
||||
//detail::fvec4SIMD ldexp(detail::fvec4SIMD const & x, detail::ivec4SIMD const & exp);
|
||||
|
||||
//! Returns the length of x, i.e., sqrt(x * x).
|
||||
//! (From GLM_GTX_simd_vec4 extension, geometry functions)
|
||||
float simdLength(
|
||||
|
@ -280,6 +280,241 @@ namespace glm
|
||||
return Result;
|
||||
}
|
||||
|
||||
detail::fvec4SIMD abs
|
||||
(
|
||||
detail::fvec4SIMD const & x
|
||||
)
|
||||
{
|
||||
return detail::sse_abs_ps(x.Data);
|
||||
}
|
||||
|
||||
inline detail::fvec4SIMD sign
|
||||
(
|
||||
detail::fvec4SIMD const & x
|
||||
)
|
||||
{
|
||||
return detail::sse_sgn_ps(x.Data);
|
||||
}
|
||||
|
||||
inline detail::fvec4SIMD floor
|
||||
(
|
||||
detail::fvec4SIMD const & x
|
||||
)
|
||||
{
|
||||
return detail::sse_flr_ps(x.Data);
|
||||
}
|
||||
|
||||
inline detail::fvec4SIMD trunc
|
||||
(
|
||||
detail::fvec4SIMD const & x
|
||||
)
|
||||
{
|
||||
return detail::sse_flr_ps(detail::sse_abs_ps(x.Data));
|
||||
}
|
||||
|
||||
inline detail::fvec4SIMD round
|
||||
(
|
||||
detail::fvec4SIMD const & x
|
||||
)
|
||||
{
|
||||
return detail::sse_rnd_ps(x.Data);
|
||||
}
|
||||
|
||||
//inline detail::fvec4SIMD roundEven
|
||||
//(
|
||||
// detail::fvec4SIMD const & x
|
||||
//)
|
||||
//{
|
||||
|
||||
//}
|
||||
|
||||
inline detail::fvec4SIMD ceil
|
||||
(
|
||||
detail::fvec4SIMD const & x
|
||||
)
|
||||
{
|
||||
return detail::sse_ceil_ps(x.Data);
|
||||
}
|
||||
|
||||
inline detail::fvec4SIMD fract
|
||||
(
|
||||
detail::fvec4SIMD const & x
|
||||
)
|
||||
{
|
||||
return detail::sse_frc_ps(x.Data);
|
||||
}
|
||||
|
||||
inline detail::fvec4SIMD mod
|
||||
(
|
||||
detail::fvec4SIMD const & x,
|
||||
detail::fvec4SIMD const & y
|
||||
)
|
||||
{
|
||||
return detail::sse_mod_ps(x.Data, y.Data);
|
||||
}
|
||||
|
||||
inline detail::fvec4SIMD mod
|
||||
(
|
||||
detail::fvec4SIMD const & x,
|
||||
float const & y
|
||||
)
|
||||
{
|
||||
return detail::sse_mod_ps(x.Data, _mm_set1_ps(y));
|
||||
}
|
||||
|
||||
//inline detail::fvec4SIMD modf
|
||||
//(
|
||||
// detail::fvec4SIMD const & x,
|
||||
// detail::fvec4SIMD & i
|
||||
//)
|
||||
//{
|
||||
|
||||
//}
|
||||
|
||||
inline detail::fvec4SIMD min
|
||||
(
|
||||
detail::fvec4SIMD const & x,
|
||||
detail::fvec4SIMD const & y
|
||||
)
|
||||
{
|
||||
return _mm_min_ps(x.Data, y.Data);
|
||||
}
|
||||
|
||||
inline detail::fvec4SIMD min
|
||||
(
|
||||
detail::fvec4SIMD const & x,
|
||||
float const & y
|
||||
)
|
||||
{
|
||||
return _mm_min_ps(x.Data, _mm_set1_ps(y));
|
||||
}
|
||||
|
||||
inline detail::fvec4SIMD max
|
||||
(
|
||||
detail::fvec4SIMD const & x,
|
||||
detail::fvec4SIMD const & y
|
||||
)
|
||||
{
|
||||
return _mm_max_ps(x.Data, y.Data);
|
||||
}
|
||||
|
||||
inline detail::fvec4SIMD max
|
||||
(
|
||||
detail::fvec4SIMD const & x,
|
||||
float const & y
|
||||
)
|
||||
{
|
||||
return _mm_max_ps(x.Data, _mm_set1_ps(y));
|
||||
}
|
||||
|
||||
inline detail::fvec4SIMD clamp
|
||||
(
|
||||
detail::fvec4SIMD const & x,
|
||||
detail::fvec4SIMD const & minVal,
|
||||
detail::fvec4SIMD const & maxVal
|
||||
)
|
||||
{
|
||||
return detail::sse_clp_ps(x.Data, minVal.Data, maxVal.Data);
|
||||
}
|
||||
|
||||
inline detail::fvec4SIMD clamp
|
||||
(
|
||||
detail::fvec4SIMD const & x,
|
||||
float const & minVal,
|
||||
float const & maxVal
|
||||
)
|
||||
{
|
||||
return detail::sse_clp_ps(x.Data, _mm_set1_ps(minVal), _mm_set1_ps(maxVal));
|
||||
}
|
||||
|
||||
inline detail::fvec4SIMD mix
|
||||
(
|
||||
detail::fvec4SIMD const & x,
|
||||
detail::fvec4SIMD const & y,
|
||||
detail::fvec4SIMD const & a
|
||||
)
|
||||
{
|
||||
__m128 Sub0 = _mm_sub_ps(y.Data, x.Data);
|
||||
__m128 Mul0 = _mm_mul_ps(a.Data, Sub0);
|
||||
return _mm_mul_ps(x.Data, Mul0);
|
||||
}
|
||||
|
||||
inline detail::fvec4SIMD step
|
||||
(
|
||||
detail::fvec4SIMD const & edge,
|
||||
detail::fvec4SIMD const & x
|
||||
)
|
||||
{
|
||||
__m128 cmp0 = _mm_cmpngt_ps(x.Data, edge.Data);
|
||||
return _mm_max_ps(_mm_min_ps(cmp0, _mm_setzero_ps()), detail::one);
|
||||
}
|
||||
|
||||
inline detail::fvec4SIMD step
|
||||
(
|
||||
float const & edge,
|
||||
detail::fvec4SIMD const & x
|
||||
)
|
||||
{
|
||||
__m128 cmp0 = _mm_cmpngt_ps(x.Data, _mm_set1_ps(edge));
|
||||
return _mm_max_ps(_mm_min_ps(cmp0, _mm_setzero_ps()), detail::one);
|
||||
}
|
||||
|
||||
inline detail::fvec4SIMD smoothstep
|
||||
(
|
||||
detail::fvec4SIMD const & edge0,
|
||||
detail::fvec4SIMD const & edge1,
|
||||
detail::fvec4SIMD const & x
|
||||
)
|
||||
{
|
||||
return detail::sse_ssp_ps(edge0.Data, edge1.Data, x.Data);
|
||||
}
|
||||
|
||||
inline detail::fvec4SIMD smoothstep
|
||||
(
|
||||
float const & edge0,
|
||||
float const & edge1,
|
||||
detail::fvec4SIMD const & x
|
||||
)
|
||||
{
|
||||
return detail::sse_ssp_ps(_mm_set1_ps(edge0), _mm_set1_ps(edge1), x.Data);
|
||||
}
|
||||
|
||||
//inline bvec4 isnan(detail::fvec4SIMD const & x)
|
||||
//{
|
||||
|
||||
//}
|
||||
|
||||
//inline bvec4 isinf(detail::fvec4SIMD const & x)
|
||||
//{
|
||||
|
||||
//}
|
||||
|
||||
//inline detail::ivec4SIMD floatBitsToInt
|
||||
//(
|
||||
// detail::fvec4SIMD const & value
|
||||
//)
|
||||
//{
|
||||
|
||||
//}
|
||||
|
||||
//inline detail::fvec4SIMD intBitsToFloat
|
||||
//(
|
||||
// detail::ivec4SIMD const & value
|
||||
//)
|
||||
//{
|
||||
|
||||
//}
|
||||
|
||||
inline detail::fvec4SIMD fma
|
||||
(
|
||||
detail::fvec4SIMD const & a,
|
||||
detail::fvec4SIMD const & b,
|
||||
detail::fvec4SIMD const & c
|
||||
)
|
||||
{
|
||||
return _mm_add_ps(_mm_mul_ps(a.Data, b.Data), c.Data);
|
||||
}
|
||||
|
||||
inline float simdLength
|
||||
(
|
||||
detail::fvec4SIMD const & x
|
||||
|
Loading…
Reference in New Issue
Block a user