mirror of
https://sourceware.org/git/glibc.git
synced 2024-11-24 22:10:13 +00:00
a5721ebc68
For arguments with X^2 + Y^2 close to 1, clog and clog10 avoid large errors from log(hypot) by computing X^2 + Y^2 - 1 in a way that avoids cancellation error and then using log1p. However, the thresholds for using that approach still result in log being used on argument as large as sqrt(13/16) > 0.9, leading to significant errors, in some cases above the 9ulp maximum allowed in glibc libm. This patch arranges for the approach using log1p to be used in any cases where |X|, |Y| < 1 and X^2 + Y^2 >= 0.5 (with the existing allowance for cases where one of X and Y is very small), adjusting the __x2y2m1 functions to work with the wider range of inputs. This way, log only gets used on arguments below sqrt(1/2) (or substantially above 1), where the error involved is much less. Tested for x86_64, x86, mips64 and powerpc. For the ulps regeneration I removed the existing clog and clog10 ulps before regenerating to allow any reduced ulps to appear. Tests added include those found by random test generation to produce large ulps either before or after the patch, and some found by trying inputs close to the (0.75, 0.5) threshold where the potential errors from using log are largest. [BZ #19016] * sysdeps/generic/math_private.h (__x2y2m1f): Update comment to allow more cases with X^2 + Y^2 >= 0.5. * sysdeps/ieee754/dbl-64/x2y2m1.c (__x2y2m1): Likewise. Add -1 as normal element in sum instead of special-casing based on values of arguments. * sysdeps/ieee754/dbl-64/x2y2m1f.c (__x2y2m1f): Update comment. * sysdeps/ieee754/ldbl-128/x2y2m1l.c (__x2y2m1l): Likewise. Add -1 as normal element in sum instead of special-casing based on values of arguments. * sysdeps/ieee754/ldbl-128ibm/x2y2m1l.c (__x2y2m1l): Likewise. * sysdeps/ieee754/ldbl-96/x2y2m1.c [FLT_EVAL_METHOD != 0] (__x2y2m1): Update comment. * sysdeps/ieee754/ldbl-96/x2y2m1l.c (__x2y2m1l): Likewise. Add -1 as normal element in sum instead of special-casing based on values of arguments. * math/s_clog.c (__clog): Handle more cases using log1p without hypot. * math/s_clog10.c (__clog10): Likewise. * math/s_clog10f.c (__clog10f): Likewise. * math/s_clog10l.c (__clog10l): Likewise. * math/s_clogf.c (__clogf): Likewise. * math/s_clogl.c (__clogl): Likewise. * math/auto-libm-test-in: Add more tests of clog and clog10. * math/auto-libm-test-out: Regenerated. * sysdeps/i386/fpu/libm-test-ulps: Update. * sysdeps/x86_64/fpu/libm-test-ulps: Likewise.
124 lines
3.7 KiB
C
124 lines
3.7 KiB
C
/* Compute x^2 + y^2 - 1, without large cancellation error.
|
|
Copyright (C) 2012-2015 Free Software Foundation, Inc.
|
|
This file is part of the GNU C Library.
|
|
|
|
The GNU C Library is free software; you can redistribute it and/or
|
|
modify it under the terms of the GNU Lesser General Public
|
|
License as published by the Free Software Foundation; either
|
|
version 2.1 of the License, or (at your option) any later version.
|
|
|
|
The GNU C Library is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
Lesser General Public License for more details.
|
|
|
|
You should have received a copy of the GNU Lesser General Public
|
|
License along with the GNU C Library; if not, see
|
|
<http://www.gnu.org/licenses/>. */
|
|
|
|
#include <math.h>
|
|
#include <math_private.h>
|
|
#include <float.h>
|
|
#include <stdlib.h>
|
|
|
|
/* Calculate X + Y exactly and store the result in *HI + *LO. It is
|
|
given that |X| >= |Y| and the values are small enough that no
|
|
overflow occurs. */
|
|
|
|
static inline void
|
|
add_split (double *hi, double *lo, double x, double y)
|
|
{
|
|
/* Apply Dekker's algorithm. */
|
|
*hi = x + y;
|
|
*lo = (x - *hi) + y;
|
|
}
|
|
|
|
/* Calculate X * Y exactly and store the result in *HI + *LO. It is
|
|
given that the values are small enough that no overflow occurs and
|
|
large enough (or zero) that no underflow occurs. */
|
|
|
|
static inline void
|
|
mul_split (double *hi, double *lo, double x, double y)
|
|
{
|
|
#ifdef __FP_FAST_FMA
|
|
/* Fast built-in fused multiply-add. */
|
|
*hi = x * y;
|
|
*lo = __builtin_fma (x, y, -*hi);
|
|
#elif defined FP_FAST_FMA
|
|
/* Fast library fused multiply-add, compiler before GCC 4.6. */
|
|
*hi = x * y;
|
|
*lo = __fma (x, y, -*hi);
|
|
#else
|
|
/* Apply Dekker's algorithm. */
|
|
*hi = x * y;
|
|
# define C ((1 << (DBL_MANT_DIG + 1) / 2) + 1)
|
|
double x1 = x * C;
|
|
double y1 = y * C;
|
|
# undef C
|
|
x1 = (x - x1) + x1;
|
|
y1 = (y - y1) + y1;
|
|
double x2 = x - x1;
|
|
double y2 = y - y1;
|
|
*lo = (((x1 * y1 - *hi) + x1 * y2) + x2 * y1) + x2 * y2;
|
|
#endif
|
|
}
|
|
|
|
/* Compare absolute values of floating-point values pointed to by P
|
|
and Q for qsort. */
|
|
|
|
static int
|
|
compare (const void *p, const void *q)
|
|
{
|
|
double pd = fabs (*(const double *) p);
|
|
double qd = fabs (*(const double *) q);
|
|
if (pd < qd)
|
|
return -1;
|
|
else if (pd == qd)
|
|
return 0;
|
|
else
|
|
return 1;
|
|
}
|
|
|
|
/* Return X^2 + Y^2 - 1, computed without large cancellation error.
|
|
It is given that 1 > X >= Y >= epsilon / 2, and that X^2 + Y^2 >=
|
|
0.5. */
|
|
|
|
long double
|
|
__x2y2m1l (long double x, long double y)
|
|
{
|
|
double vals[13];
|
|
SET_RESTORE_ROUND (FE_TONEAREST);
|
|
union ibm_extended_long_double xu, yu;
|
|
xu.ld = x;
|
|
yu.ld = y;
|
|
if (fabs (xu.d[1].d) < 0x1p-500)
|
|
xu.d[1].d = 0.0;
|
|
if (fabs (yu.d[1].d) < 0x1p-500)
|
|
yu.d[1].d = 0.0;
|
|
mul_split (&vals[1], &vals[0], xu.d[0].d, xu.d[0].d);
|
|
mul_split (&vals[3], &vals[2], xu.d[0].d, xu.d[1].d);
|
|
vals[2] *= 2.0;
|
|
vals[3] *= 2.0;
|
|
mul_split (&vals[5], &vals[4], xu.d[1].d, xu.d[1].d);
|
|
mul_split (&vals[7], &vals[6], yu.d[0].d, yu.d[0].d);
|
|
mul_split (&vals[9], &vals[8], yu.d[0].d, yu.d[1].d);
|
|
vals[8] *= 2.0;
|
|
vals[9] *= 2.0;
|
|
mul_split (&vals[11], &vals[10], yu.d[1].d, yu.d[1].d);
|
|
vals[12] = -1.0;
|
|
qsort (vals, 13, sizeof (double), compare);
|
|
/* Add up the values so that each element of VALS has absolute value
|
|
at most equal to the last set bit of the next nonzero
|
|
element. */
|
|
for (size_t i = 0; i <= 11; i++)
|
|
{
|
|
add_split (&vals[i + 1], &vals[i], vals[i + 1], vals[i]);
|
|
qsort (vals + i + 1, 12 - i, sizeof (double), compare);
|
|
}
|
|
/* Now any error from this addition will be small. */
|
|
long double retval = (long double) vals[12];
|
|
for (size_t i = 11; i != (size_t) -1; i--)
|
|
retval += (long double) vals[i];
|
|
return retval;
|
|
}
|