Fix fma missing underflows and bad results for some subnormal results (bugs 14152, 14783).

This commit is contained in:
Joseph Myers 2012-10-30 13:54:50 +00:00
parent 2a27fd6dae
commit 8627a2329c
6 changed files with 105 additions and 53 deletions

View File

@ -1,5 +1,18 @@
2012-10-30 Joseph Myers <joseph@codesourcery.com>
[BZ #14152]
[BZ #14783]
* sysdeps/ieee754/dbl-64/s_fma.c (__fma): Extract low bits of
result and shift together with sticky bit instead of replicating
round-to-nearest rounding.
* sysdeps/ieee754/ldbl-128/s_fmal.c (__fmal): Likewise.
* sysdeps/ieee754/ldbl-96/s_fmal.c (__fmal): Likewise.
* math/libm-test.inc (fma_test): Add more tests. Do not permit
missing underflow exceptions.
(fma_test_towardzero): Add more tests.
(fma_test_downward): Likewise.
(fma_test_upward): Likewise.
[BZ #14047]
* sysdeps/generic/tininess.h: New file.
* sysdeps/i386/tininess.h: Likewise.

14
NEWS
View File

@ -11,13 +11,13 @@ Version 2.17
1349, 3479, 5044, 5298, 5400, 6530, 6778, 6808, 9685, 9914, 10014, 10038,
10631, 11438, 11607, 12140, 13412, 13542, 13601, 13629, 13679, 13696,
13717, 13741, 13939, 13966, 14042, 14047, 14090, 14150, 14151, 14154,
14157, 14166, 14173, 14195, 14237, 14251, 14252, 14283, 14298, 14303,
14307, 14328, 14331, 14336, 14337, 14347, 14349, 14376, 14417, 14459,
14476, 14477, 14505, 14510, 14516, 14518, 14519, 14530, 14532, 14538,
14543, 14544, 14545, 14557, 14562, 14568, 14576, 14579, 14583, 14587,
14602, 14621, 14638, 14645, 14648, 14652, 14660, 14661, 14683, 14694,
14716, 14743, 14767.
13717, 13741, 13939, 13966, 14042, 14047, 14090, 14150, 14151, 14152,
14154, 14157, 14166, 14173, 14195, 14237, 14251, 14252, 14283, 14298,
14303, 14307, 14328, 14331, 14336, 14337, 14347, 14349, 14376, 14417,
14459, 14476, 14477, 14505, 14510, 14516, 14518, 14519, 14530, 14532,
14538, 14543, 14544, 14545, 14557, 14562, 14568, 14576, 14579, 14583,
14587, 14602, 14621, 14638, 14645, 14648, 14652, 14660, 14661, 14683,
14694, 14716, 14743, 14767, 14783.
* Support for STT_GNU_IFUNC symbols added for s390 and s390x.
Optimized versions of memcpy, memset, and memcmp added for System z10 and

View File

@ -4616,6 +4616,8 @@ fma_test (void)
TEST_fff_f (fma, 0x1.fffffep+127, 0x1.001p+0, -0x1.fffffep+127, 0x1.fffffep+115);
TEST_fff_f (fma, -0x1.fffffep+127, 0x1.fffffep+0, 0x1.fffffep+127, -0x1.fffffap+127);
TEST_fff_f (fma, 0x1.fffffep+127, 2.0, -0x1.fffffep+127, 0x1.fffffep+127);
TEST_fff_f (fma, 0x1.4p-126, 0x1.000004p-1, 0x1p-128, 0x1.c00004p-127, UNDERFLOW_EXCEPTION);
TEST_fff_f (fma, -0x1.4p-126, 0x1.000004p-1, -0x1p-128, -0x1.c00004p-127, UNDERFLOW_EXCEPTION);
#endif
#if defined (TEST_DOUBLE) && DBL_MANT_DIG == 53
TEST_fff_f (fma, 0x1.7fp+13, 0x1.0000000000001p+0, 0x1.ffep-48, 0x1.7f00000000001p+13);
@ -4635,10 +4637,11 @@ fma_test (void)
TEST_fff_f (fma, 0x1.4000004p-967, 0x1p-106, 0x0.000001p-1022, 0x0.0000010000003p-1022, UNDERFLOW_EXCEPTION);
TEST_fff_f (fma, 0x1.4p-967, -0x1p-106, -0x0.000001p-1022, -0x0.0000010000002p-1022, UNDERFLOW_EXCEPTION);
TEST_fff_f (fma, -0x1.19cab66d73e17p-959, 0x1.c7108a8c5ff51p-107, -0x0.80b0ad65d9b64p-1022, -0x0.80b0ad65d9d59p-1022, UNDERFLOW_EXCEPTION);
/* Sometimes the FE_UNDERFLOW is not set, so be prepared. See Bug 14152. */
TEST_fff_f (fma, -0x1.d2eaed6e8e9d3p-979, -0x1.4e066c62ac9ddp-63, -0x0.9245e6b003454p-1022, -0x0.9245c09c5fb5dp-1022, UNDERFLOW_EXCEPTION_OK);
TEST_fff_f (fma, -0x1.d2eaed6e8e9d3p-979, -0x1.4e066c62ac9ddp-63, -0x0.9245e6b003454p-1022, -0x0.9245c09c5fb5dp-1022, UNDERFLOW_EXCEPTION);
TEST_fff_f (fma, 0x1.153d650bb9f06p-907, 0x1.2d01230d48407p-125, -0x0.b278d5acfc3cp-1022, -0x0.b22757123bbe9p-1022, UNDERFLOW_EXCEPTION);
TEST_fff_f (fma, -0x1.fffffffffffffp-711, 0x1.fffffffffffffp-275, 0x1.fffffe00007ffp-983, 0x1.7ffffe00007ffp-983);
TEST_fff_f (fma, 0x1.4p-1022, 0x1.0000000000002p-1, 0x1p-1024, 0x1.c000000000002p-1023, UNDERFLOW_EXCEPTION);
TEST_fff_f (fma, -0x1.4p-1022, 0x1.0000000000002p-1, -0x1p-1024, -0x1.c000000000002p-1023, UNDERFLOW_EXCEPTION);
#endif
#if defined (TEST_LDOUBLE) && LDBL_MANT_DIG == 64
TEST_fff_f (fma, -0x8.03fcp+3696L, 0xf.fffffffffffffffp-6140L, 0x8.3ffffffffffffffp-2450L, -0x8.01ecp-2440L);
@ -4646,8 +4649,9 @@ fma_test (void)
TEST_fff_f (fma, 0xc.7fc000003ffffffp-1194L, 0x8.1e0003fffffffffp+15327L, -0x8.fffep+14072L, 0xc.ae9f164020effffp+14136L);
TEST_fff_f (fma, -0x8.0001fc000000003p+1798L, 0xcp-2230L, 0x8.f7e000000000007p-468L, -0xc.0002f9ffee10404p-429L);
TEST_fff_f (fma, 0xc.0000000000007ffp+10130L, -0x8.000000000000001p+4430L, 0xc.07000000001ffffp+14513L, -0xb.fffffffffffd7e4p+14563L);
/* Bug 14152: underflow exception may be missing. */
TEST_fff_f (fma, 0xb.ffffp-4777L, 0x8.000000fffffffffp-11612L, -0x0.3800fff8p-16385L, 0x5.c7fe80c7ffeffffp-16385L, UNDERFLOW_EXCEPTION_OK);
TEST_fff_f (fma, 0xb.ffffp-4777L, 0x8.000000fffffffffp-11612L, -0x0.3800fff8p-16385L, 0x5.c7fe80c7ffeffffp-16385L, UNDERFLOW_EXCEPTION);
TEST_fff_f (fma, 0x1.4p-16382L, 0x1.0000000000000004p-1L, 0x1p-16384L, 0x1.c000000000000004p-16383L, UNDERFLOW_EXCEPTION);
TEST_fff_f (fma, -0x1.4p-16382L, 0x1.0000000000000004p-1L, -0x1p-16384L, -0x1.c000000000000004p-16383L, UNDERFLOW_EXCEPTION);
#endif
#if defined (TEST_LDOUBLE) && LDBL_MANT_DIG == 113
TEST_fff_f (fma, 0x1.bb2de33e02ccbbfa6e245a7c1f71p-2584L, -0x1.6b500daf0580d987f1bc0cadfcddp-13777L, 0x1.613cd91d9fed34b33820e5ab9d8dp-16378L, -0x1.3a79fb50eb9ce887cffa0f09bd9fp-16360L);
@ -4663,6 +4667,8 @@ fma_test (void)
TEST_fff_f (fma, 0x1.00000000000007ffffffffffffffp-9045L, -0x1.ffffffffffff80000001ffffffffp+4773L, -0x1.f8p-4316L, -0x1.00000000000f88000000fffffdffp-4271L);
TEST_fff_f (fma, 0x1.4e922764c90701d4a2f21d01893dp-8683L, -0x1.955a12e2d7c9447c27fa022fc865p+212L, -0x1.e9634462eaef96528b90b6944578p-8521L, -0x1.08e1783184a371943d3598e10865p-8470L);
TEST_fff_f (fma, 0x1.801181509c03bdbef10d6165588cp-15131L, 0x1.ad86f8e57d3d40bfa8007780af63p-368L, -0x1.6e9df0dab1c9f1d7a6043c390741p-15507L, 0x1.417c9b2b15e2ad57dc9e0e920844p-15498L);
TEST_fff_f (fma, 0x1.4p-16382L, 0x1.0000000000000000000000000002p-1L, 0x1p-16384L, 0x1.c000000000000000000000000002p-16383L, UNDERFLOW_EXCEPTION);
TEST_fff_f (fma, -0x1.4p-16382L, 0x1.0000000000000000000000000002p-1L, -0x1p-16384L, -0x1.c000000000000000000000000002p-16383L, UNDERFLOW_EXCEPTION);
#endif
END (fma);
@ -4717,6 +4723,23 @@ fma_test_towardzero (void)
TEST_fff_f (fma, -min_value, min_value, minus_zero, minus_zero, UNDERFLOW_EXCEPTION);
TEST_fff_f (fma, -min_value, -min_value, plus_zero, plus_zero, UNDERFLOW_EXCEPTION);
TEST_fff_f (fma, -min_value, -min_value, minus_zero, plus_zero, UNDERFLOW_EXCEPTION);
#if defined (TEST_FLOAT) && FLT_MANT_DIG == 24
TEST_fff_f (fma, 0x1.4p-126, 0x1.000004p-1, 0x1p-128, 0x1.c00004p-127, UNDERFLOW_EXCEPTION);
TEST_fff_f (fma, -0x1.4p-126, 0x1.000004p-1, -0x1p-128, -0x1.c00004p-127, UNDERFLOW_EXCEPTION);
#endif
#if defined (TEST_DOUBLE) && DBL_MANT_DIG == 53
TEST_fff_f (fma, 0x1.4p-1022, 0x1.0000000000002p-1, 0x1p-1024, 0x1.c000000000002p-1023, UNDERFLOW_EXCEPTION);
TEST_fff_f (fma, -0x1.4p-1022, 0x1.0000000000002p-1, -0x1p-1024, -0x1.c000000000002p-1023, UNDERFLOW_EXCEPTION);
#endif
#if defined (TEST_LDOUBLE) && LDBL_MANT_DIG == 64
TEST_fff_f (fma, 0x1.4p-16382L, 0x1.0000000000000004p-1L, 0x1p-16384L, 0x1.c000000000000004p-16383L, UNDERFLOW_EXCEPTION);
TEST_fff_f (fma, -0x1.4p-16382L, 0x1.0000000000000004p-1L, -0x1p-16384L, -0x1.c000000000000004p-16383L, UNDERFLOW_EXCEPTION);
#endif
#if defined (TEST_LDOUBLE) && LDBL_MANT_DIG == 113
TEST_fff_f (fma, 0x1.4p-16382L, 0x1.0000000000000000000000000002p-1L, 0x1p-16384L, 0x1.c000000000000000000000000002p-16383L, UNDERFLOW_EXCEPTION);
TEST_fff_f (fma, -0x1.4p-16382L, 0x1.0000000000000000000000000002p-1L, -0x1p-16384L, -0x1.c000000000000000000000000002p-16383L, UNDERFLOW_EXCEPTION);
#endif
}
fesetround (save_round_mode);
@ -4773,6 +4796,23 @@ fma_test_downward (void)
TEST_fff_f (fma, -min_value, min_value, minus_zero, -min_subnorm_value, UNDERFLOW_EXCEPTION);
TEST_fff_f (fma, -min_value, -min_value, plus_zero, plus_zero, UNDERFLOW_EXCEPTION);
TEST_fff_f (fma, -min_value, -min_value, minus_zero, plus_zero, UNDERFLOW_EXCEPTION);
#if defined (TEST_FLOAT) && FLT_MANT_DIG == 24
TEST_fff_f (fma, 0x1.4p-126, 0x1.000004p-1, 0x1p-128, 0x1.c00004p-127, UNDERFLOW_EXCEPTION);
TEST_fff_f (fma, -0x1.4p-126, 0x1.000004p-1, -0x1p-128, -0x1.c00008p-127, UNDERFLOW_EXCEPTION);
#endif
#if defined (TEST_DOUBLE) && DBL_MANT_DIG == 53
TEST_fff_f (fma, 0x1.4p-1022, 0x1.0000000000002p-1, 0x1p-1024, 0x1.c000000000002p-1023, UNDERFLOW_EXCEPTION);
TEST_fff_f (fma, -0x1.4p-1022, 0x1.0000000000002p-1, -0x1p-1024, -0x1.c000000000004p-1023, UNDERFLOW_EXCEPTION);
#endif
#if defined (TEST_LDOUBLE) && LDBL_MANT_DIG == 64
TEST_fff_f (fma, 0x1.4p-16382L, 0x1.0000000000000004p-1L, 0x1p-16384L, 0x1.c000000000000004p-16383L, UNDERFLOW_EXCEPTION);
TEST_fff_f (fma, -0x1.4p-16382L, 0x1.0000000000000004p-1L, -0x1p-16384L, -0x1.c000000000000008p-16383L, UNDERFLOW_EXCEPTION);
#endif
#if defined (TEST_LDOUBLE) && LDBL_MANT_DIG == 113
TEST_fff_f (fma, 0x1.4p-16382L, 0x1.0000000000000000000000000002p-1L, 0x1p-16384L, 0x1.c000000000000000000000000002p-16383L, UNDERFLOW_EXCEPTION);
TEST_fff_f (fma, -0x1.4p-16382L, 0x1.0000000000000000000000000002p-1L, -0x1p-16384L, -0x1.c000000000000000000000000004p-16383L, UNDERFLOW_EXCEPTION);
#endif
}
fesetround (save_round_mode);
@ -4829,6 +4869,23 @@ fma_test_upward (void)
TEST_fff_f (fma, -min_value, min_value, minus_zero, minus_zero, UNDERFLOW_EXCEPTION);
TEST_fff_f (fma, -min_value, -min_value, plus_zero, min_subnorm_value, UNDERFLOW_EXCEPTION);
TEST_fff_f (fma, -min_value, -min_value, minus_zero, min_subnorm_value, UNDERFLOW_EXCEPTION);
#if defined (TEST_FLOAT) && FLT_MANT_DIG == 24
TEST_fff_f (fma, 0x1.4p-126, 0x1.000004p-1, 0x1p-128, 0x1.c00008p-127, UNDERFLOW_EXCEPTION);
TEST_fff_f (fma, -0x1.4p-126, 0x1.000004p-1, -0x1p-128, -0x1.c00004p-127, UNDERFLOW_EXCEPTION);
#endif
#if defined (TEST_DOUBLE) && DBL_MANT_DIG == 53
TEST_fff_f (fma, 0x1.4p-1022, 0x1.0000000000002p-1, 0x1p-1024, 0x1.c000000000004p-1023, UNDERFLOW_EXCEPTION);
TEST_fff_f (fma, -0x1.4p-1022, 0x1.0000000000002p-1, -0x1p-1024, -0x1.c000000000002p-1023, UNDERFLOW_EXCEPTION);
#endif
#if defined (TEST_LDOUBLE) && LDBL_MANT_DIG == 64
TEST_fff_f (fma, 0x1.4p-16382L, 0x1.0000000000000004p-1L, 0x1p-16384L, 0x1.c000000000000008p-16383L, UNDERFLOW_EXCEPTION);
TEST_fff_f (fma, -0x1.4p-16382L, 0x1.0000000000000004p-1L, -0x1p-16384L, -0x1.c000000000000004p-16383L, UNDERFLOW_EXCEPTION);
#endif
#if defined (TEST_LDOUBLE) && LDBL_MANT_DIG == 113
TEST_fff_f (fma, 0x1.4p-16382L, 0x1.0000000000000000000000000002p-1L, 0x1p-16384L, 0x1.c000000000000000000000000004p-16383L, UNDERFLOW_EXCEPTION);
TEST_fff_f (fma, -0x1.4p-16382L, 0x1.0000000000000000000000000002p-1L, -0x1p-16384L, -0x1.c000000000000000000000000002p-16383L, UNDERFLOW_EXCEPTION);
#endif
}
fesetround (save_round_mode);

View File

@ -210,20 +210,14 @@ __fma (double x, double y, double z)
{
/* v.ieee.mantissa1 & 2 is LSB bit of the result before rounding,
v.ieee.mantissa1 & 1 is the round bit and j is our sticky
bit. In round-to-nearest 001 rounds down like 00,
011 rounds up, even though 01 rounds down (thus we need
to adjust), 101 rounds down like 10 and 111 rounds up
like 11. */
if ((v.ieee.mantissa1 & 3) == 1)
{
v.d *= 0x1p-106;
if (v.ieee.negative)
return v.d - 0x1p-1074 /* __DBL_DENORM_MIN__ */;
else
return v.d + 0x1p-1074 /* __DBL_DENORM_MIN__ */;
}
else
return v.d * 0x1p-106;
bit. */
w.d = 0.0;
w.ieee.mantissa1 = ((v.ieee.mantissa1 & 3) << 1) | j;
w.ieee.negative = v.ieee.negative;
v.ieee.mantissa1 &= ~3U;
v.d *= 0x1p-106;
w.d *= 0x1p-2;
return v.d + w.d;
}
v.ieee.mantissa1 |= j;
return v.d * 0x1p-106;

View File

@ -211,20 +211,14 @@ __fmal (long double x, long double y, long double z)
{
/* v.ieee.mantissa3 & 2 is LSB bit of the result before rounding,
v.ieee.mantissa3 & 1 is the round bit and j is our sticky
bit. In round-to-nearest 001 rounds down like 00,
011 rounds up, even though 01 rounds down (thus we need
to adjust), 101 rounds down like 10 and 111 rounds up
like 11. */
if ((v.ieee.mantissa3 & 3) == 1)
{
v.d *= 0x1p-226L;
if (v.ieee.negative)
return v.d - 0x1p-16494L /* __LDBL_DENORM_MIN__ */;
else
return v.d + 0x1p-16494L /* __LDBL_DENORM_MIN__ */;
}
else
return v.d * 0x1p-226L;
bit. */
w.d = 0.0L;
w.ieee.mantissa3 = ((v.ieee.mantissa3 & 3) << 1) | j;
w.ieee.negative = v.ieee.negative;
v.ieee.mantissa3 &= ~3U;
v.d *= 0x1p-226L;
w.d *= 0x1p-2L;
return v.d + w.d;
}
v.ieee.mantissa3 |= j;
return v.d * 0x1p-226L;

View File

@ -211,20 +211,14 @@ __fmal (long double x, long double y, long double z)
{
/* v.ieee.mantissa1 & 2 is LSB bit of the result before rounding,
v.ieee.mantissa1 & 1 is the round bit and j is our sticky
bit. In round-to-nearest 001 rounds down like 00,
011 rounds up, even though 01 rounds down (thus we need
to adjust), 101 rounds down like 10 and 111 rounds up
like 11. */
if ((v.ieee.mantissa1 & 3) == 1)
{
v.d *= 0x1p-128L;
if (v.ieee.negative)
return v.d - 0x1p-16445L /* __LDBL_DENORM_MIN__ */;
else
return v.d + 0x1p-16445L /* __LDBL_DENORM_MIN__ */;
}
else
return v.d * 0x1p-128L;
bit. */
w.d = 0.0L;
w.ieee.mantissa1 = ((v.ieee.mantissa1 & 3) << 1) | j;
w.ieee.negative = v.ieee.negative;
v.ieee.mantissa1 &= ~3U;
v.d *= 0x1p-128L;
w.d *= 0x1p-2L;
return v.d + w.d;
}
v.ieee.mantissa1 |= j;
return v.d * 0x1p-128L;