1
0
mirror of https://github.com/microsoft/DirectXMath synced 2024-09-19 14:49:54 +00:00

Fixed float denorm conversion handling for XMConvertFloatToHalf (#114)

This commit is contained in:
Chuck Walbourn 2020-06-25 15:08:32 -07:00 committed by GitHub
parent 196104d0eb
commit d0bbddc9f2
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 27 additions and 36 deletions

View File

@ -71,7 +71,7 @@ namespace DirectX
//------------------------------------------------------------------------------
// 16 bit floating point number consisting of a sign bit, a 5 bit biased
// exponent, and a 10 bit mantissa
typedef uint16_t HALF;
using HALF = uint16_t;
//------------------------------------------------------------------------------
// 2D Vector; 16 bit floating point components

View File

@ -387,8 +387,8 @@ inline HALF XMConvertFloatToHalf(float Value) noexcept
{
#if defined(_XM_F16C_INTRINSICS_) && !defined(_XM_NO_INTRINSICS_)
__m128 V1 = _mm_set_ss(Value);
__m128i V2 = _mm_cvtps_ph(V1, 0);
return static_cast<HALF>(_mm_cvtsi128_si32(V2));
__m128i V2 = _mm_cvtps_ph(V1, _MM_FROUND_TO_NEAREST_INT);
return static_cast<HALF>(_mm_extract_epi16(V2, 0));
#elif defined(_XM_ARM_NEON_INTRINSICS_) && (defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || __aarch64__) && !defined(_XM_NO_INTRINSICS_)
float32x4_t vFloat = vdupq_n_f32(Value);
float16x4_t vHalf = vcvt_f16_f32(vFloat);
@ -399,38 +399,29 @@ inline HALF XMConvertFloatToHalf(float Value) noexcept
auto IValue = reinterpret_cast<uint32_t*>(&Value)[0];
uint32_t Sign = (IValue & 0x80000000U) >> 16U;
IValue = IValue & 0x7FFFFFFFU; // Hack off the sign
if (IValue > 0x477FE000U)
if (IValue >= 0x47800000 /*e+16*/)
{
// The number is too large to be represented as a half. Saturate to infinity.
if (((IValue & 0x7F800000) == 0x7F800000) && ((IValue & 0x7FFFFF) != 0))
{
Result = 0x7FFF; // NAN
}
else
{
Result = 0x7C00U; // INF
}
// The number is too large to be represented as a half. Return infinity or NaN
Result = 0x7C00U | ((IValue > 0x7F800000) ? (0x200 | ((IValue >> 13U) & 0x3FFU)) : 0U);
}
else if (!IValue)
else if (IValue <= 0x33000000U /*e-25*/)
{
Result = 0;
}
else if (IValue < 0x38800000U /*e-14*/)
{
// The number is too small to be represented as a normalized half.
// Convert it to a denormalized value.
uint32_t Shift = 125U - (IValue >> 23U);
IValue = 0x800000U | (IValue & 0x7FFFFFU);
Result = IValue >> (Shift + 1);
uint32_t s = (IValue & ((1U << Shift) - 1)) != 0;
Result += (Result | s) & ((IValue >> Shift) & 1U);
}
else
{
if (IValue < 0x38800000U)
{
// The number is too small to be represented as a normalized half.
// Convert it to a denormalized value.
uint32_t Shift = 113U - (IValue >> 23U);
IValue = (0x800000U | (IValue & 0x7FFFFFU)) >> Shift;
}
else
{
// Rebias the exponent to represent the value as a normalized half.
IValue += 0xC8000000U;
}
// Rebias the exponent to represent the value as a normalized half.
IValue += 0xC8000000U;
Result = ((IValue + 0x0FFFU + ((IValue >> 13U) & 1U)) >> 13U) & 0x7FFFU;
}
return static_cast<HALF>(Result | Sign);
@ -477,7 +468,7 @@ inline HALF* XMConvertFloatToHalfStream
__m128 FV = _mm_load_ps(reinterpret_cast<const float*>(pFloat));
pFloat += InputStride * 4;
__m128i HV = _mm_cvtps_ph(FV, 0);
__m128i HV = _mm_cvtps_ph(FV, _MM_FROUND_TO_NEAREST_INT);
_mm_storel_epi64(reinterpret_cast<__m128i*>(pHalf), HV);
pHalf += OutputStride * 4;
@ -492,7 +483,7 @@ inline HALF* XMConvertFloatToHalfStream
__m128 FV = _mm_loadu_ps(reinterpret_cast<const float*>(pFloat));
pFloat += InputStride * 4;
__m128i HV = _mm_cvtps_ph(FV, 0);
__m128i HV = _mm_cvtps_ph(FV, _MM_FROUND_TO_NEAREST_INT);
_mm_storel_epi64(reinterpret_cast<__m128i*>(pHalf), HV);
pHalf += OutputStride * 4;
@ -510,7 +501,7 @@ inline HALF* XMConvertFloatToHalfStream
__m128 FV = _mm_load_ps(reinterpret_cast<const float*>(pFloat));
pFloat += InputStride * 4;
__m128i HV = _mm_cvtps_ph(FV, 0);
__m128i HV = _mm_cvtps_ph(FV, _MM_FROUND_TO_NEAREST_INT);
*reinterpret_cast<HALF*>(pHalf) = static_cast<HALF>(_mm_extract_epi16(HV, 0));
pHalf += OutputStride;
@ -531,7 +522,7 @@ inline HALF* XMConvertFloatToHalfStream
__m128 FV = _mm_loadu_ps(reinterpret_cast<const float*>(pFloat));
pFloat += InputStride * 4;
__m128i HV = _mm_cvtps_ph(FV, 0);
__m128i HV = _mm_cvtps_ph(FV, _MM_FROUND_TO_NEAREST_INT);
*reinterpret_cast<HALF*>(pHalf) = static_cast<HALF>(_mm_extract_epi16(HV, 0));
pHalf += OutputStride;
@ -567,7 +558,7 @@ inline HALF* XMConvertFloatToHalfStream
__m128 FT = _mm_blend_ps(FV3, FV4, 0x8);
FV = _mm_blend_ps(FV, FT, 0xC);
__m128i HV = _mm_cvtps_ph(FV, 0);
__m128i HV = _mm_cvtps_ph(FV, _MM_FROUND_TO_NEAREST_INT);
_mm_storel_epi64(reinterpret_cast<__m128i*>(pHalf), HV);
pHalf += OutputStride * 4;
@ -595,7 +586,7 @@ inline HALF* XMConvertFloatToHalfStream
__m128 FT = _mm_blend_ps(FV3, FV4, 0x8);
FV = _mm_blend_ps(FV, FT, 0xC);
__m128i HV = _mm_cvtps_ph(FV, 0);
__m128i HV = _mm_cvtps_ph(FV, _MM_FROUND_TO_NEAREST_INT);
*reinterpret_cast<HALF*>(pHalf) = static_cast<HALF>(_mm_extract_epi16(HV, 0));
pHalf += OutputStride;
@ -2099,7 +2090,7 @@ inline void XM_CALLCONV XMStoreHalf2
{
assert(pDestination);
#if defined(_XM_F16C_INTRINSICS_) && !defined(_XM_NO_INTRINSICS_)
__m128i V1 = _mm_cvtps_ph(V, 0);
__m128i V1 = _mm_cvtps_ph(V, _MM_FROUND_TO_NEAREST_INT);
_mm_store_ss(reinterpret_cast<float*>(pDestination), _mm_castsi128_ps(V1));
#else
pDestination->x = XMConvertFloatToHalf(XMVectorGetX(V));
@ -2655,7 +2646,7 @@ inline void XM_CALLCONV XMStoreHalf4
{
assert(pDestination);
#if defined(_XM_F16C_INTRINSICS_) && !defined(_XM_NO_INTRINSICS_)
__m128i V1 = _mm_cvtps_ph(V, 0);
__m128i V1 = _mm_cvtps_ph(V, _MM_FROUND_TO_NEAREST_INT);
_mm_storel_epi64(reinterpret_cast<__m128i*>(pDestination), V1);
#else
XMFLOAT4A t;