mirror of
https://sourceware.org/git/glibc.git
synced 2024-11-26 15:00:06 +00:00
bc3753638a
On powerpc32 hard-float, older processors (ones where fcfid is not available for 32-bit code), GCC generates conversions from integers to floating point that wrongly convert integer 0 to -0 instead of +0 in FE_DOWNWARD mode. This in turn results in logb and a few other functions wrongly returning -0 when they should return +0. This patch works around this issue in glibc as I proposed in <https://sourceware.org/ml/libc-alpha/2015-09/msg00728.html>, so that the affected functions can be correct and the affected tests pass in the absence of a GCC fix for this longstanding issue (GCC bug 67771 - if fixed, of course we can put in GCC version conditionals, and eventually phase out the workarounds). A new macro FIX_INT_FP_CONVERT_ZERO is added in a new sysdeps header fix-int-fp-convert-zero.h, and the powerpc32/fpu version of that header defines the macro based on the results of a configure test for whether such conversions use the fcfid instruction. Tested for x86_64 (that installed stripped shared libraries are unchanged by the patch) and powerpc (that HAVE_PPC_FCFID comes out to 0 as expected and that the relevant tests are fixed). Also tested a build with GCC configured for -mcpu=power4 and verified that HAVE_PPC_FCFID comes out to 1 in that case. There are still some other issues to fix to get test-float and test-double passing cleanly for older powerpc32 processors (apart from the need for an ulps regeneration for powerpc). (test-ldouble will be harder to get passing cleanly, but with a combination of selected fixes to ldbl-128ibm code that don't involve significant performance issues, allowing spurious underflow and inexact exceptions for that format, and lots of XFAILing for the default case of unpatched libgcc, it should be doable.) [BZ #887] [BZ #19049] [BZ #19050] * sysdeps/generic/fix-int-fp-convert-zero.h: New file. * sysdeps/ieee754/dbl-64/e_log10.c: Include <fix-int-fp-convert-zero.h>. (__ieee754_log10): Adjust signs as needed if FIX_INT_FP_CONVERT_ZERO. * sysdeps/ieee754/dbl-64/e_log2.c: Include <fix-int-fp-convert-zero.h>. (__ieee754_log2): Adjust signs as needed if FIX_INT_FP_CONVERT_ZERO. * sysdeps/ieee754/dbl-64/s_erf.c: Include <fix-int-fp-convert-zero.h>. (__erfc): Adjust signs as needed if FIX_INT_FP_CONVERT_ZERO. * sysdeps/ieee754/dbl-64/s_logb.c: Include <fix-int-fp-convert-zero.h>. (__logb): Adjust signs as needed if FIX_INT_FP_CONVERT_ZERO. * sysdeps/ieee754/flt-32/e_log10f.c: Include <fix-int-fp-convert-zero.h>. (__ieee754_log10f): Adjust signs as needed if FIX_INT_FP_CONVERT_ZERO. * sysdeps/ieee754/flt-32/e_log2f.c: Include <fix-int-fp-convert-zero.h>. (__ieee754_log2f): Adjust signs as needed if FIX_INT_FP_CONVERT_ZERO. * sysdeps/ieee754/flt-32/s_erff.c: Include <fix-int-fp-convert-zero.h>. (__erfcf): Adjust signs as needed if FIX_INT_FP_CONVERT_ZERO. * sysdeps/ieee754/flt-32/s_logbf.c: Include <fix-int-fp-convert-zero.h>. (__logbf): Adjust signs as needed if FIX_INT_FP_CONVERT_ZERO. * sysdeps/ieee754/ldbl-128ibm/s_erfl.c: Include <fix-int-fp-convert-zero.h>. (__erfcl): Adjust signs as needed if FIX_INT_FP_CONVERT_ZERO. * sysdeps/ieee754/ldbl-128ibm/s_logbl.c: Include <fix-int-fp-convert-zero.h>. (__logbl): Adjust signs as needed if FIX_INT_FP_CONVERT_ZERO. * sysdeps/powerpc/powerpc32/fpu/configure.ac: New file. * sysdeps/powerpc/powerpc32/fpu/configure: New generated file. * sysdeps/powerpc/powerpc32/fpu/fix-int-fp-convert-zero.h: New file. * config.h.in [_LIBC] (HAVE_PPC_FCFID): New macro.
134 lines
4.3 KiB
C
134 lines
4.3 KiB
C
/* Adapted for log2 by Ulrich Drepper <drepper@cygnus.com>. */
|
|
/*
|
|
* ====================================================
|
|
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
|
|
*
|
|
* Developed at SunPro, a Sun Microsystems, Inc. business.
|
|
* Permission to use, copy, modify, and distribute this
|
|
* software is freely granted, provided that this notice
|
|
* is preserved.
|
|
* ====================================================
|
|
*/
|
|
|
|
/* __ieee754_log2(x)
|
|
* Return the logarithm to base 2 of x
|
|
*
|
|
* Method :
|
|
* 1. Argument Reduction: find k and f such that
|
|
* x = 2^k * (1+f),
|
|
* where sqrt(2)/2 < 1+f < sqrt(2) .
|
|
*
|
|
* 2. Approximation of log(1+f).
|
|
* Let s = f/(2+f) ; based on log(1+f) = log(1+s) - log(1-s)
|
|
* = 2s + 2/3 s**3 + 2/5 s**5 + .....,
|
|
* = 2s + s*R
|
|
* We use a special Reme algorithm on [0,0.1716] to generate
|
|
* a polynomial of degree 14 to approximate R The maximum error
|
|
* of this polynomial approximation is bounded by 2**-58.45. In
|
|
* other words,
|
|
* 2 4 6 8 10 12 14
|
|
* R(z) ~ Lg1*s +Lg2*s +Lg3*s +Lg4*s +Lg5*s +Lg6*s +Lg7*s
|
|
* (the values of Lg1 to Lg7 are listed in the program)
|
|
* and
|
|
* | 2 14 | -58.45
|
|
* | Lg1*s +...+Lg7*s - R(z) | <= 2
|
|
* | |
|
|
* Note that 2s = f - s*f = f - hfsq + s*hfsq, where hfsq = f*f/2.
|
|
* In order to guarantee error in log below 1ulp, we compute log
|
|
* by
|
|
* log(1+f) = f - s*(f - R) (if f is not too large)
|
|
* log(1+f) = f - (hfsq - s*(hfsq+R)). (better accuracy)
|
|
*
|
|
* 3. Finally, log(x) = k + log(1+f).
|
|
* = k+(f-(hfsq-(s*(hfsq+R))))
|
|
*
|
|
* Special cases:
|
|
* log2(x) is NaN with signal if x < 0 (including -INF) ;
|
|
* log2(+INF) is +INF; log(0) is -INF with signal;
|
|
* log2(NaN) is that NaN with no signal.
|
|
*
|
|
* Constants:
|
|
* The hexadecimal values are the intended ones for the following
|
|
* constants. The decimal values may be used, provided that the
|
|
* compiler will convert from decimal to binary accurately enough
|
|
* to produce the hexadecimal values shown.
|
|
*/
|
|
|
|
#include <math.h>
|
|
#include <math_private.h>
|
|
#include <fix-int-fp-convert-zero.h>
|
|
|
|
static const double ln2 = 0.69314718055994530942;
|
|
static const double two54 = 1.80143985094819840000e+16; /* 43500000 00000000 */
|
|
static const double Lg1 = 6.666666666666735130e-01; /* 3FE55555 55555593 */
|
|
static const double Lg2 = 3.999999999940941908e-01; /* 3FD99999 9997FA04 */
|
|
static const double Lg3 = 2.857142874366239149e-01; /* 3FD24924 94229359 */
|
|
static const double Lg4 = 2.222219843214978396e-01; /* 3FCC71C5 1D8E78AF */
|
|
static const double Lg5 = 1.818357216161805012e-01; /* 3FC74664 96CB03DE */
|
|
static const double Lg6 = 1.531383769920937332e-01; /* 3FC39A09 D078C69F */
|
|
static const double Lg7 = 1.479819860511658591e-01; /* 3FC2F112 DF3E5244 */
|
|
|
|
static const double zero = 0.0;
|
|
|
|
double
|
|
__ieee754_log2 (double x)
|
|
{
|
|
double hfsq, f, s, z, R, w, t1, t2, dk;
|
|
int32_t k, hx, i, j;
|
|
u_int32_t lx;
|
|
|
|
EXTRACT_WORDS (hx, lx, x);
|
|
|
|
k = 0;
|
|
if (hx < 0x00100000)
|
|
{ /* x < 2**-1022 */
|
|
if (__glibc_unlikely (((hx & 0x7fffffff) | lx) == 0))
|
|
return -two54 / (x - x); /* log(+-0)=-inf */
|
|
if (__glibc_unlikely (hx < 0))
|
|
return (x - x) / (x - x); /* log(-#) = NaN */
|
|
k -= 54;
|
|
x *= two54; /* subnormal number, scale up x */
|
|
GET_HIGH_WORD (hx, x);
|
|
}
|
|
if (__glibc_unlikely (hx >= 0x7ff00000))
|
|
return x + x;
|
|
k += (hx >> 20) - 1023;
|
|
hx &= 0x000fffff;
|
|
i = (hx + 0x95f64) & 0x100000;
|
|
SET_HIGH_WORD (x, hx | (i ^ 0x3ff00000)); /* normalize x or x/2 */
|
|
k += (i >> 20);
|
|
dk = (double) k;
|
|
f = x - 1.0;
|
|
if ((0x000fffff & (2 + hx)) < 3)
|
|
{ /* |f| < 2**-20 */
|
|
if (f == zero)
|
|
{
|
|
if (FIX_INT_FP_CONVERT_ZERO && dk == 0.0)
|
|
dk = 0.0;
|
|
return dk;
|
|
}
|
|
R = f * f * (0.5 - 0.33333333333333333 * f);
|
|
return dk - (R - f) / ln2;
|
|
}
|
|
s = f / (2.0 + f);
|
|
z = s * s;
|
|
i = hx - 0x6147a;
|
|
w = z * z;
|
|
j = 0x6b851 - hx;
|
|
t1 = w * (Lg2 + w * (Lg4 + w * Lg6));
|
|
t2 = z * (Lg1 + w * (Lg3 + w * (Lg5 + w * Lg7)));
|
|
i |= j;
|
|
R = t2 + t1;
|
|
if (i > 0)
|
|
{
|
|
hfsq = 0.5 * f * f;
|
|
return dk - ((hfsq - (s * (hfsq + R))) - f) / ln2;
|
|
}
|
|
else
|
|
{
|
|
return dk - ((s * (f - R)) - f) / ln2;
|
|
}
|
|
}
|
|
|
|
strong_alias (__ieee754_log2, __log2_finite)
|