1
0
mirror of https://github.com/microsoft/DirectXMath synced 2024-11-21 20:00:12 +00:00

Implement SVML Intrinsics paths for VS2019 builds (#108)

This commit is contained in:
Chuck Walbourn 2020-05-04 23:50:02 -07:00
parent cdb6d2c44b
commit 103b33d248
2 changed files with 78 additions and 0 deletions

View File

@ -89,6 +89,10 @@
#endif
#endif // !_XM_ARM_NEON_INTRINSICS_ && !_XM_SSE_INTRINSICS_ && !_XM_NO_INTRINSICS_
#if defined(_XM_SSE_INTRINSICS_) && defined(_MSC_VER) && (_MSC_VER >= 1920) && !defined(__clang__) && !defined(_XM_SVML_INTRINSICS_) && !defined(_XM_DISABLE_INTEL_SVML_)
#define _XM_SVML_INTRINSICS_
#endif
#if !defined(_XM_NO_XMVECTOR_OVERLOADS_) && (defined(__clang__) || defined(__GNUC__))
#define _XM_NO_XMVECTOR_OVERLOADS_
#endif

View File

@ -3296,6 +3296,9 @@ inline XMVECTOR XM_CALLCONV XMVectorExp2(FXMVECTOR V) noexcept
float32x4_t vResult = vbslq_f32(isNaN, g_XMQNaN, result5);
return vResult;
#elif defined(_XM_SVML_INTRINSICS_)
XMVECTOR Result = _mm_exp2_ps(V);
return Result;
#elif defined(_XM_SSE_INTRINSICS_)
__m128i itrunc = _mm_cvttps_epi32(V);
__m128 ftrunc = _mm_cvtepi32_ps(itrunc);
@ -3378,6 +3381,9 @@ inline XMVECTOR XM_CALLCONV XMVectorExp10(FXMVECTOR V) noexcept
} } };
return Result.v;
#elif defined(_XM_SVML_INTRINSICS_)
XMVECTOR Result = _mm_exp10_ps(V);
return Result;
#else
// exp10(V) = exp2(vin*log2(10))
XMVECTOR Vten = XMVectorMultiply(g_XMLg10, V);
@ -3399,6 +3405,9 @@ inline XMVECTOR XM_CALLCONV XMVectorExpE(FXMVECTOR V) noexcept
} } };
return Result.v;
#elif defined(_XM_SVML_INTRINSICS_)
XMVECTOR Result = _mm_exp_ps(V);
return Result;
#else
// expE(V) = exp2(vin*log2(e))
XMVECTOR Ve = XMVectorMultiply(g_XMLgE, V);
@ -3645,6 +3654,9 @@ inline XMVECTOR XM_CALLCONV XMVectorLog2(FXMVECTOR V) noexcept
result = vbslq_f32(isPositive, result, tmp);
result = vbslq_f32(isNaN, g_XMQNaN, result);
return result;
#elif defined(_XM_SVML_INTRINSICS_)
XMVECTOR Result = _mm_log2_ps(V);
return Result;
#elif defined(_XM_SSE_INTRINSICS_)
__m128i rawBiased = _mm_and_si128(_mm_castps_si128(V), g_XMInfinity);
__m128i trailing = _mm_and_si128(_mm_castps_si128(V), g_XMQNaNTest);
@ -3804,6 +3816,9 @@ inline XMVECTOR XM_CALLCONV XMVectorLog10(FXMVECTOR V) noexcept
result = vbslq_f32(isPositive, result, tmp);
result = vbslq_f32(isNaN, g_XMQNaN, result);
return result;
#elif defined(_XM_SVML_INTRINSICS_)
XMVECTOR Result = _mm_log10_ps(V);
return Result;
#elif defined(_XM_SSE_INTRINSICS_)
__m128i rawBiased = _mm_and_si128(_mm_castps_si128(V), g_XMInfinity);
__m128i trailing = _mm_and_si128(_mm_castps_si128(V), g_XMQNaNTest);
@ -3965,6 +3980,9 @@ inline XMVECTOR XM_CALLCONV XMVectorLogE(FXMVECTOR V) noexcept
result = vbslq_f32(isPositive, result, tmp);
result = vbslq_f32(isNaN, g_XMQNaN, result);
return result;
#elif defined(_XM_SVML_INTRINSICS_)
XMVECTOR Result = _mm_log_ps(V);
return Result;
#elif defined(_XM_SSE_INTRINSICS_)
__m128i rawBiased = _mm_and_si128(_mm_castps_si128(V), g_XMInfinity);
__m128i trailing = _mm_and_si128(_mm_castps_si128(V), g_XMQNaNTest);
@ -4082,6 +4100,9 @@ inline XMVECTOR XM_CALLCONV XMVectorPow
powf(vgetq_lane_f32(V1, 3), vgetq_lane_f32(V2, 3))
} } };
return vResult.v;
#elif defined(_XM_SVML_INTRINSICS_)
XMVECTOR Result = _mm_pow_ps(V1, V2);
return Result;
#elif defined(_XM_SSE_INTRINSICS_)
XM_ALIGNED_DATA(16) float a[4];
XM_ALIGNED_DATA(16) float b[4];
@ -4222,6 +4243,9 @@ inline XMVECTOR XM_CALLCONV XMVectorSin(FXMVECTOR V) noexcept
Result = vmlaq_f32(g_XMOne, Result, x2);
Result = vmulq_f32(Result, x);
return Result;
#elif defined(_XM_SVML_INTRINSICS_)
XMVECTOR Result = _mm_sin_ps(V);
return Result;
#elif defined(_XM_SSE_INTRINSICS_)
// Force the value within the bounds of pi
XMVECTOR x = XMVectorModAngles(V);
@ -4307,6 +4331,9 @@ inline XMVECTOR XM_CALLCONV XMVectorCos(FXMVECTOR V) noexcept
Result = vmlaq_f32(g_XMOne, Result, x2);
Result = vmulq_f32(Result, sign);
return Result;
#elif defined(_XM_SVML_INTRINSICS_)
XMVECTOR Result = _mm_cos_ps(V);
return Result;
#elif defined(_XM_SSE_INTRINSICS_)
// Map V to x in [-pi,pi].
XMVECTOR x = XMVectorModAngles(V);
@ -4430,6 +4457,8 @@ inline void XM_CALLCONV XMVectorSinCos
Result = vmlaq_f32(g_XMOne, Result, x2);
*pCos = vmulq_f32(Result, sign);
#elif defined(_XM_SVML_INTRINSICS_)
*pSin = _mm_sincos_ps(pCos, V);
#elif defined(_XM_SSE_INTRINSICS_)
// Force the value within the bounds of pi
XMVECTOR x = XMVectorModAngles(V);
@ -4505,6 +4534,9 @@ inline XMVECTOR XM_CALLCONV XMVectorTan(FXMVECTOR V) noexcept
tanf(V.vector4_f32[3])
} } };
return Result.v;
#elif defined(_XM_SVML_INTRINSICS_)
XMVECTOR Result = _mm_tan_ps(V);
return Result;
#elif defined(_XM_SSE_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
static const XMVECTORF32 TanCoefficients0 = { { { 1.0f, -4.667168334e-1f, 2.566383229e-2f, -3.118153191e-4f } } };
@ -4604,6 +4636,9 @@ inline XMVECTOR XM_CALLCONV XMVectorSinH(FXMVECTOR V) noexcept
XMVECTOR E2 = XMVectorExp(V2);
return vsubq_f32(E1, E2);
#elif defined(_XM_SVML_INTRINSICS_)
XMVECTOR Result = _mm_sinh_ps(V);
return Result;
#elif defined(_XM_SSE_INTRINSICS_)
static const XMVECTORF32 Scale = { { { 1.442695040888963f, 1.442695040888963f, 1.442695040888963f, 1.442695040888963f } } }; // 1.0f / ln(2.0f)
@ -4636,6 +4671,9 @@ inline XMVECTOR XM_CALLCONV XMVectorCosH(FXMVECTOR V) noexcept
XMVECTOR E1 = XMVectorExp(V1);
XMVECTOR E2 = XMVectorExp(V2);
return vaddq_f32(E1, E2);
#elif defined(_XM_SVML_INTRINSICS_)
XMVECTOR Result = _mm_cosh_ps(V);
return Result;
#elif defined(_XM_SSE_INTRINSICS_)
static const XMVECTORF32 Scale = { { { 1.442695040888963f, 1.442695040888963f, 1.442695040888963f, 1.442695040888963f } } }; // 1.0f / ln(2.0f)
@ -4667,6 +4705,9 @@ inline XMVECTOR XM_CALLCONV XMVectorTanH(FXMVECTOR V) noexcept
E = vmlaq_f32(g_XMOneHalf.v, E, g_XMOneHalf.v);
E = XMVectorReciprocal(E);
return vsubq_f32(g_XMOne.v, E);
#elif defined(_XM_SVML_INTRINSICS_)
XMVECTOR Result = _mm_tanh_ps(V);
return Result;
#elif defined(_XM_SSE_INTRINSICS_)
static const XMVECTORF32 Scale = { { { 2.8853900817779268f, 2.8853900817779268f, 2.8853900817779268f, 2.8853900817779268f } } }; // 2.0f / ln(2.0f)
@ -4730,6 +4771,9 @@ inline XMVECTOR XM_CALLCONV XMVectorASin(FXMVECTOR V) noexcept
t0 = vbslq_f32(nonnegative, t0, t1);
t0 = vsubq_f32(g_XMHalfPi, t0);
return t0;
#elif defined(_XM_SVML_INTRINSICS_)
XMVECTOR Result = _mm_asin_ps(V);
return Result;
#elif defined(_XM_SSE_INTRINSICS_)
__m128 nonnegative = _mm_cmpge_ps(V, g_XMZero);
__m128 mvalue = _mm_sub_ps(g_XMZero, V);
@ -4826,6 +4870,9 @@ inline XMVECTOR XM_CALLCONV XMVectorACos(FXMVECTOR V) noexcept
float32x4_t t1 = vsubq_f32(g_XMPi, t0);
t0 = vbslq_f32(nonnegative, t0, t1);
return t0;
#elif defined(_XM_SVML_INTRINSICS_)
XMVECTOR Result = _mm_acos_ps(V);
return Result;
#elif defined(_XM_SSE_INTRINSICS_)
__m128 nonnegative = _mm_cmpge_ps(V, g_XMZero);
__m128 mvalue = _mm_sub_ps(g_XMZero, V);
@ -4928,6 +4975,9 @@ inline XMVECTOR XM_CALLCONV XMVectorATan(FXMVECTOR V) noexcept
comp = vceqq_f32(sign, g_XMZero);
Result = vbslq_f32(comp, Result, result1);
return Result;
#elif defined(_XM_SVML_INTRINSICS_)
XMVECTOR Result = _mm_atan_ps(V);
return Result;
#elif defined(_XM_SSE_INTRINSICS_)
__m128 absV = XMVectorAbs(V);
__m128 invV = _mm_div_ps(g_XMOne, V);
@ -5000,6 +5050,9 @@ inline XMVECTOR XM_CALLCONV XMVectorATan2
atan2f(Y.vector4_f32[3], X.vector4_f32[3])
} } };
return Result.v;
#elif defined(_XM_SVML_INTRINSICS_)
XMVECTOR Result = _mm_atan2_ps(Y, X);
return Result;
#else
// Return the inverse tangent of Y / X in the range of -Pi to Pi with the following exceptions:
@ -5096,6 +5149,9 @@ inline XMVECTOR XM_CALLCONV XMVectorSinEst(FXMVECTOR V) noexcept
Result = vmlaq_f32(g_XMOne, Result, x2);
Result = vmulq_f32(Result, x);
return Result;
#elif defined(_XM_SVML_INTRINSICS_)
XMVECTOR Result = _mm_sin_ps(V);
return Result;
#elif defined(_XM_SSE_INTRINSICS_)
// Force the value within the bounds of pi
XMVECTOR x = XMVectorModAngles(V);
@ -5166,6 +5222,9 @@ inline XMVECTOR XM_CALLCONV XMVectorCosEst(FXMVECTOR V) noexcept
Result = vmlaq_f32(g_XMOne, Result, x2);
Result = vmulq_f32(Result, sign);
return Result;
#elif defined(_XM_SVML_INTRINSICS_)
XMVECTOR Result = _mm_cos_ps(V);
return Result;
#elif defined(_XM_SSE_INTRINSICS_)
// Map V to x in [-pi,pi].
XMVECTOR x = XMVectorModAngles(V);
@ -5324,6 +5383,9 @@ inline XMVECTOR XM_CALLCONV XMVectorTanEst(FXMVECTOR V) noexcept
tanf(V.vector4_f32[3])
} } };
return Result.v;
#elif defined(_XM_SVML_INTRINSICS_)
XMVECTOR Result = _mm_tan_ps(V);
return Result;
#else
XMVECTOR OneOverPi = XMVectorSplatW(g_XMTanEstCoefficients.v);
@ -5388,6 +5450,9 @@ inline XMVECTOR XM_CALLCONV XMVectorASinEst(FXMVECTOR V) noexcept
t0 = vbslq_f32(nonnegative, t0, t1);
t0 = vsubq_f32(g_XMHalfPi, t0);
return t0;
#elif defined(_XM_SVML_INTRINSICS_)
XMVECTOR Result = _mm_asin_ps(V);
return Result;
#elif defined(_XM_SSE_INTRINSICS_)
__m128 nonnegative = _mm_cmpge_ps(V, g_XMZero);
__m128 mvalue = _mm_sub_ps(g_XMZero, V);
@ -5458,6 +5523,9 @@ inline XMVECTOR XM_CALLCONV XMVectorACosEst(FXMVECTOR V) noexcept
float32x4_t t1 = vsubq_f32(g_XMPi, t0);
t0 = vbslq_f32(nonnegative, t0, t1);
return t0;
#elif defined(_XM_SVML_INTRINSICS_)
XMVECTOR Result = _mm_acos_ps(V);
return Result;
#elif defined(_XM_SSE_INTRINSICS_)
__m128 nonnegative = _mm_cmpge_ps(V, g_XMZero);
__m128 mvalue = _mm_sub_ps(g_XMZero, V);
@ -5535,6 +5603,9 @@ inline XMVECTOR XM_CALLCONV XMVectorATanEst(FXMVECTOR V) noexcept
comp = vceqq_f32(sign, g_XMZero);
Result = vbslq_f32(comp, Result, result1);
return Result;
#elif defined(_XM_SVML_INTRINSICS_)
XMVECTOR Result = _mm_atan_ps(V);
return Result;
#elif defined(_XM_SSE_INTRINSICS_)
__m128 absV = XMVectorAbs(V);
__m128 invV = _mm_div_ps(g_XMOne, V);
@ -5593,6 +5664,9 @@ inline XMVECTOR XM_CALLCONV XMVectorATan2Est
atan2f(Y.vector4_f32[3], X.vector4_f32[3]),
} } };
return Result.v;
#elif defined(_XM_SVML_INTRINSICS_)
XMVECTOR Result = _mm_atan2_ps(Y, X);
return Result;
#else
static const XMVECTORF32 ATan2Constants = { { { XM_PI, XM_PIDIV2, XM_PIDIV4, 2.3561944905f /* Pi*3/4 */ } } };