replace tgammaf by the CORE-MATH implementation

The CORE-MATH implementation is correctly rounded (for any rounding mode).
This can be checked by exhaustive tests in a few minutes since there are
less than 2^32 values to check against for example GNU MPFR.
This patch also adds some bench values for tgammaf.

Tested on x86_64 and x86 (cfarm26).

With the initial GNU libc code it gave on an Intel(R) Core(TM) i7-8700:

      "tgammaf": {
       "": {
        "duration": 3.50188e+09,
        "iterations": 2e+07,
        "max": 602.891,
        "min": 65.1415,
        "mean": 175.094
       }
      }

With the new code:

      "tgammaf": {
       "": {
        "duration": 3.30825e+09,
        "iterations": 5e+07,
        "max": 211.592,
        "min": 32.0325,
        "mean": 66.1649
       }
      }

With the initial GNU libc code it gave on cfarm26 (i686):

  "tgammaf": {
   "": {
    "duration": 3.70505e+09,
    "iterations": 6e+06,
    "max": 2420.23,
    "min": 243.154,
    "mean": 617.509
   }
  }

With the new code:

  "tgammaf": {
   "": {
    "duration": 3.24497e+09,
    "iterations": 1.8e+07,
    "max": 1238.15,
    "min": 101.155,
    "mean": 180.276
   }
  }

Signed-off-by: Alexei Sibidanov <sibid@uvic.ca>
Signed-off-by: Paul Zimmermann <Paul.Zimmermann@inria.fr>

Changes in v2:
    - include <math.h> (fix the linknamespace failures)
    - restored original benchtests/strcoll-inputs/filelist#en_US.UTF-8 file
    - restored original wrapper code (math/w_tgammaf_compat.c),
      except for the dealing with the sign
    - removed the tgammaf/float entries in all libm-test-ulps files
    - address other comments from Joseph Myers
      (https://sourceware.org/pipermail/libc-alpha/2024-July/158736.html)

Changes in v3:
    - pass NULL argument for signgam from w_tgammaf_compat.c
    - use of math_narrow_eval
    - added more comments

Changes in v4:
    - initialize local_signgam to 0 in math/w_tgamma_template.c
    - replace sysdeps/ieee754/dbl-64/gamma_productf.c by dummy file

Changes in v5:
    - do not mention local_signgam any more in math/w_tgammaf_compat.c
    - initialize local_signgam to 1 instead of 0 in w_tgamma_template.c
      and added comment

Changes in v6:
    - pass NULL as 2nd argument of __ieee754_gammaf_r in
      w_tgammaf_compat.c, and check for NULL in e_gammaf_r.c

Changes in v7:
    - added Signed-off-by line for Alexei Sibidanov (author of the code)

Changes in v8:
    - added Signed-off-by line for Paul Zimmermann (submitted of the patch)

Changes in v9:
    - address comments from review by Adhemerval Zanella
Reviewed-by: Adhemerval Zanella  <adhemerval.zanella@linaro.org>
This commit is contained in:
Paul Zimmermann 2024-10-08 11:28:04 +02:00
parent e850abd8d8
commit 392b3f0971
33 changed files with 1151 additions and 334 deletions

View File

@ -219,3 +219,15 @@ tzdata:
timezone/leapseconds timezone/leapseconds
# This is yearistype.sh in the parent project # This is yearistype.sh in the parent project
timezone/yearistype timezone/yearistype
# The following files are shared with the CORE-MATH project
sysdeps/ieee754/flt-32/e_gammaf_r.c
(file src/binary32/tgamma/tgammaf.c in CORE-MATH)
Instructions to merge new versions:
- change the function name from cr_tgammaf to __ieee754_gammaf_r
- add "int *signgamp" as 2nd argument and add at the beginning:
if (signgamp != NULL) *signgamp = 1;
- remove the errno stuff (this is done by the wrapper)
- replace 0x1p127f * 0x1p127f by math_narrow_eval (x * 0x1p127f)
- replace 0x1p-127f * sgn[k&1] by math_narrow_eval (0x1p-127f * sgn[k&1])
- add libm_alias_finite (__ieee754_gammaf_r, __gammaf_r) at the end

View File

@ -94,6 +94,7 @@ bench-math := \
tan \ tan \
tanh \ tanh \
tgamma \ tgamma \
tgammaf \
trunc \ trunc \
truncf \ truncf \
y0 \ y0 \

1006
benchtests/tgammaf-inputs Normal file

File diff suppressed because it is too large Load Diff

View File

@ -14,6 +14,7 @@
#include <errno.h> #include <errno.h>
#include <math.h> #include <math.h>
#include <stddef.h>
#include <math_private.h> #include <math_private.h>
#include <math-svid-compat.h> #include <math-svid-compat.h>
#include <libm-alias-float.h> #include <libm-alias-float.h>
@ -22,8 +23,7 @@
float float
__tgammaf(float x) __tgammaf(float x)
{ {
int local_signgam; float y = __ieee754_gammaf_r(x, NULL);
float y = __ieee754_gammaf_r(x,&local_signgam);
if(__glibc_unlikely (!isfinite (y) || y == 0) if(__glibc_unlikely (!isfinite (y) || y == 0)
&& (isfinite (x) || (isinf (x) && x < 0.0)) && (isfinite (x) || (isinf (x) && x < 0.0))
@ -41,7 +41,7 @@ __tgammaf(float x)
/* tgammaf overflow */ /* tgammaf overflow */
return __kernel_standard_f(x, x, 140); return __kernel_standard_f(x, x, 140);
} }
return local_signgam < 0 ? - y : y; return y;
} }
libm_alias_float (__tgamma, tgamma) libm_alias_float (__tgamma, tgamma)
#endif #endif

View File

@ -1653,22 +1653,18 @@ ldouble: 3
Function: "tgamma": Function: "tgamma":
double: 9 double: 9
float: 8
ldouble: 4 ldouble: 4
Function: "tgamma_downward": Function: "tgamma_downward":
double: 9 double: 9
float: 7
ldouble: 5 ldouble: 5
Function: "tgamma_towardzero": Function: "tgamma_towardzero":
double: 9 double: 9
float: 7
ldouble: 5 ldouble: 5
Function: "tgamma_upward": Function: "tgamma_upward":
double: 9 double: 9
float: 8
ldouble: 4 ldouble: 4
Function: "y0": Function: "y0":

View File

@ -1410,22 +1410,18 @@ ldouble: 3
Function: "tgamma": Function: "tgamma":
double: 9 double: 9
float: 8
ldouble: 4 ldouble: 4
Function: "tgamma_downward": Function: "tgamma_downward":
double: 9 double: 9
float: 7
ldouble: 5 ldouble: 5
Function: "tgamma_towardzero": Function: "tgamma_towardzero":
double: 9 double: 9
float: 7
ldouble: 5 ldouble: 5
Function: "tgamma_upward": Function: "tgamma_upward":
double: 9 double: 9
float: 8
ldouble: 4 ldouble: 4
Function: "y0": Function: "y0":

View File

@ -1157,19 +1157,15 @@ float: 3
Function: "tgamma": Function: "tgamma":
double: 9 double: 9
float: 9
Function: "tgamma_downward": Function: "tgamma_downward":
double: 9 double: 9
float: 9
Function: "tgamma_towardzero": Function: "tgamma_towardzero":
double: 9 double: 9
float: 8
Function: "tgamma_upward": Function: "tgamma_upward":
double: 9 double: 9
float: 9
Function: "y0": Function: "y0":
double: 3 double: 3

View File

@ -279,7 +279,6 @@ float: 2
Function: "tgamma": Function: "tgamma":
double: 9 double: 9
float: 8
Function: "y0": Function: "y0":
double: 3 double: 3

View File

@ -1152,19 +1152,15 @@ float: 3
Function: "tgamma": Function: "tgamma":
double: 9 double: 9
float: 8
Function: "tgamma_downward": Function: "tgamma_downward":
double: 9 double: 9
float: 7
Function: "tgamma_towardzero": Function: "tgamma_towardzero":
double: 9 double: 9
float: 7
Function: "tgamma_upward": Function: "tgamma_upward":
double: 9 double: 9
float: 8
Function: "y0": Function: "y0":
double: 3 double: 3

View File

@ -1061,19 +1061,15 @@ float: 3
Function: "tgamma": Function: "tgamma":
double: 9 double: 9
float: 8
Function: "tgamma_downward": Function: "tgamma_downward":
double: 8 double: 8
float: 7
Function: "tgamma_towardzero": Function: "tgamma_towardzero":
double: 9 double: 9
float: 7
Function: "tgamma_upward": Function: "tgamma_upward":
double: 9 double: 9
float: 8
Function: "y0": Function: "y0":
double: 3 double: 3

View File

@ -1092,19 +1092,15 @@ float: 3
Function: "tgamma": Function: "tgamma":
double: 9 double: 9
float: 8
Function: "tgamma_downward": Function: "tgamma_downward":
double: 5 double: 5
float: 5
Function: "tgamma_towardzero": Function: "tgamma_towardzero":
double: 5 double: 5
float: 4
Function: "tgamma_upward": Function: "tgamma_upward":
double: 4 double: 4
float: 4
Function: "y0": Function: "y0":
double: 3 double: 3

View File

@ -1181,20 +1181,16 @@ float: 3
Function: "tgamma": Function: "tgamma":
double: 9 double: 9
float: 8
ldouble: 1 ldouble: 1
Function: "tgamma_downward": Function: "tgamma_downward":
double: 9 double: 9
float: 7
Function: "tgamma_towardzero": Function: "tgamma_towardzero":
double: 9 double: 9
float: 7
Function: "tgamma_upward": Function: "tgamma_upward":
double: 9 double: 9
float: 8
Function: "y0": Function: "y0":
double: 3 double: 3

View File

@ -1699,25 +1699,21 @@ ldouble: 4
Function: "tgamma": Function: "tgamma":
double: 9 double: 9
float: 8
float128: 4 float128: 4
ldouble: 5 ldouble: 5
Function: "tgamma_downward": Function: "tgamma_downward":
double: 9 double: 9
float: 7
float128: 5 float128: 5
ldouble: 6 ldouble: 6
Function: "tgamma_towardzero": Function: "tgamma_towardzero":
double: 9 double: 9
float: 7
float128: 5 float128: 5
ldouble: 6 ldouble: 6
Function: "tgamma_upward": Function: "tgamma_upward":
double: 9 double: 9
float: 8
float128: 4 float128: 4
ldouble: 5 ldouble: 5

View File

@ -1701,25 +1701,21 @@ ldouble: 4
Function: "tgamma": Function: "tgamma":
double: 9 double: 9
float: 8
float128: 4 float128: 4
ldouble: 5 ldouble: 5
Function: "tgamma_downward": Function: "tgamma_downward":
double: 9 double: 9
float: 7
float128: 5 float128: 5
ldouble: 6 ldouble: 6
Function: "tgamma_towardzero": Function: "tgamma_towardzero":
double: 9 double: 9
float: 7
float128: 5 float128: 5
ldouble: 6 ldouble: 6
Function: "tgamma_upward": Function: "tgamma_upward":
double: 8 double: 8
float: 8
float128: 4 float128: 4
ldouble: 5 ldouble: 5

View File

@ -1,44 +1 @@
/* Compute a product of X, X+1, ..., with an error estimate. /* Not needed. */
Copyright (C) 2013-2024 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
#include <math.h>
#include <math-narrow-eval.h>
#include <math_private.h>
#include <float.h>
/* Compute the product of X + X_EPS, X + X_EPS + 1, ..., X + X_EPS + N
- 1, in the form R * (1 + *EPS) where the return value R is an
approximation to the product and *EPS is set to indicate the
approximate error in the return value. X is such that all the
values X + 1, ..., X + N - 1 are exactly representable, and X_EPS /
X is small enough that factors quadratic in it can be
neglected. */
float
__gamma_productf (float x, float x_eps, int n, float *eps)
{
double x_full = (double) x + (double) x_eps;
double ret = x_full;
for (int i = 1; i < n; i++)
ret *= x_full + i;
float fret = math_narrow_eval ((float) ret);
*eps = (ret - fret) / fret;
return fret;
}

View File

@ -1,215 +1,150 @@
/* Implementation of gamma function according to ISO C. /* Implementation of the gamma function for binary32.
Copyright (C) 1997-2024 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or Copyright (c) 2023-2024 Alexei Sibidanov.
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful, The original version of this file was copied from the CORE-MATH
but WITHOUT ANY WARRANTY; without even the implied warranty of project (file src/binary32/tgamma/tgammaf.c, revision a48e352).
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public Permission is hereby granted, free of charge, to any person obtaining a copy
License along with the GNU C Library; if not, see of this software and associated documentation files (the "Software"), to deal
<https://www.gnu.org/licenses/>. */ in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
*/
/* Changes with respect to the original CORE-MATH code:
- removed the dealing with errno
(this is done in the wrapper math/w_tgammaf_compat.c)
- usage of math_narrow_eval to deal with underflow/overflow
- deal with signgamp
*/
#include <math.h> #include <math.h>
#include <math-narrow-eval.h>
#include <math_private.h>
#include <fenv_private.h>
#include <math-underflow.h>
#include <float.h> #include <float.h>
#include <stdint.h>
#include <stddef.h>
#include <libm-alias-finite.h> #include <libm-alias-finite.h>
#include <math-narrow-eval.h>
/* Coefficients B_2k / 2k(2k-1) of x^-(2k-1) inside exp in Stirling's typedef union {float f; uint32_t u;} b32u32_u;
approximation to gamma function. */ typedef union {double f; uint64_t u;} b64u64_u;
static const float gamma_coeff[] =
{
0x1.555556p-4f,
-0xb.60b61p-12f,
0x3.403404p-12f,
};
#define NCOEFF (sizeof (gamma_coeff) / sizeof (gamma_coeff[0]))
/* Return gamma (X), for positive X less than 42, in the form R *
2^(*EXP2_ADJ), where R is the return value and *EXP2_ADJ is set to
avoid overflow or underflow in intermediate calculations. */
static float
gammaf_positive (float x, int *exp2_adj)
{
int local_signgam;
if (x < 0.5f)
{
*exp2_adj = 0;
return __ieee754_expf (__ieee754_lgammaf_r (x + 1, &local_signgam)) / x;
}
else if (x <= 1.5f)
{
*exp2_adj = 0;
return __ieee754_expf (__ieee754_lgammaf_r (x, &local_signgam));
}
else if (x < 2.5f)
{
*exp2_adj = 0;
float x_adj = x - 1;
return (__ieee754_expf (__ieee754_lgammaf_r (x_adj, &local_signgam))
* x_adj);
}
else
{
float eps = 0;
float x_eps = 0;
float x_adj = x;
float prod = 1;
if (x < 4.0f)
{
/* Adjust into the range for applying Stirling's
approximation. */
float n = ceilf (4.0f - x);
x_adj = math_narrow_eval (x + n);
x_eps = (x - (x_adj - n));
prod = __gamma_productf (x_adj - n, x_eps, n, &eps);
}
/* The result is now gamma (X_ADJ + X_EPS) / (PROD * (1 + EPS)).
Compute gamma (X_ADJ + X_EPS) using Stirling's approximation,
starting by computing pow (X_ADJ, X_ADJ) with a power of 2
factored out. */
float exp_adj = -eps;
float x_adj_int = roundf (x_adj);
float x_adj_frac = x_adj - x_adj_int;
int x_adj_log2;
float x_adj_mant = __frexpf (x_adj, &x_adj_log2);
if (x_adj_mant < M_SQRT1_2f)
{
x_adj_log2--;
x_adj_mant *= 2.0f;
}
*exp2_adj = x_adj_log2 * (int) x_adj_int;
float ret = (__ieee754_powf (x_adj_mant, x_adj)
* __ieee754_exp2f (x_adj_log2 * x_adj_frac)
* __ieee754_expf (-x_adj)
* sqrtf (2 * M_PIf / x_adj)
/ prod);
exp_adj += x_eps * __ieee754_logf (x_adj);
float bsum = gamma_coeff[NCOEFF - 1];
float x_adj2 = x_adj * x_adj;
for (size_t i = 1; i <= NCOEFF - 1; i++)
bsum = bsum / x_adj2 + gamma_coeff[NCOEFF - 1 - i];
exp_adj += bsum / x_adj;
return ret + ret * __expm1f (exp_adj);
}
}
float float
__ieee754_gammaf_r (float x, int *signgamp) __ieee754_gammaf_r (float x, int *signgamp)
{ {
int32_t hx; /* The wrapper in math/w_tgamma_template.c expects *signgamp to be set to a
float ret; non-negative value if the returned value is gamma(x), and to a negative
value if it is -gamma(x).
Since the code here directly computes gamma(x), we set it to 1.
*/
if (signgamp != NULL)
*signgamp = 1;
GET_FLOAT_WORD (hx, x); /* List of exceptional cases. Each entry contains the 32-bit encoding u of x,
a binary32 approximation f of gamma(x), and a correction term df. */
static const struct {uint32_t u; float f, df;} tb[] = {
{0x27de86a9u, 0x1.268266p+47f, 0x1p22f}, // x = 0x1.bd0d52p-48
{0x27e05475u, 0x1.242422p+47f, 0x1p22f}, // x = 0x1.c0a8eap-48
{0xb63befb3u, -0x1.5cb6e4p+18f, 0x1p-7f}, // x = -0x1.77df66p-19
{0x3c7bb570u, 0x1.021d9p+6f, 0x1p-19f}, // x = 0x1.f76aep-7
{0x41e886d1u, 0x1.33136ap+98f, 0x1p73f}, // x = 0x1.d10da2p+4
{0xc067d177u, 0x1.f6850cp-3f, 0x1p-28f}, // x = -0x1.cfa2eep+1
{0xbd99da31u, -0x1.befe66p+3, -0x1p-22f}, // x = -0x1.33b462p-4
{0xbf54c45au, -0x1.a6b4ecp+2, +0x1p-23f}, // x = -0x1.a988b4p-1
{0x41ee77feu, 0x1.d3631cp+101, -0x1p-76f}, // x = 0x1.dceffcp+4
{0x3f843a64u, 0x1.f6c638p-1, 0x1p-26f}, // x = 0x1.0874c8p+0
};
if (__glibc_unlikely ((hx & 0x7fffffff) == 0)) b32u32_u t = {.f = x};
{ uint32_t ax = t.u<<1;
/* Return value for x == 0 is Inf with divide by zero exception. */ if(__builtin_expect(ax>=(0xffu<<24), 0)){ /* x=NaN or +/-Inf */
*signgamp = 0; if(ax==(0xffu<<24)){ /* x=+/-Inf */
return 1.0 / x; if(t.u>>31){ /* x=-Inf */
return x / x; /* will raise the "Invalid operation" exception */
}
return x; /* x=+Inf */
} }
if (__builtin_expect (hx < 0, 0) return x + x; /* x=NaN, where x+x ensures the "Invalid operation"
&& (uint32_t) hx < 0xff800000 && rintf (x) == x) exception is set if x is sNaN */
{ }
/* Return value for integer x < 0 is NaN with invalid exception. */ double z = x;
*signgamp = 0; if(__builtin_expect(ax<0x6d000000u, 0)){ /* |x| < 0x1p-18 */
return (x - x) / (x - x); volatile double d = (0x1.fa658c23b1578p-1 - 0x1.d0a118f324b63p-1*z)*z - 0x1.2788cfc6fb619p-1;
double f = 1.0/z + d;
float r = f;
b64u64_u rt = {.f = f};
if(((rt.u+2)&0xfffffff) < 4){
for(unsigned i=0;i<sizeof(tb)/sizeof(tb[0]);i++)
if(t.u==tb[i].u) return tb[i].f + tb[i].df;
} }
if (__glibc_unlikely (hx == 0xff800000)) return r;
{ }
/* x == -Inf. According to ISO this is NaN. */ float fx = __builtin_floorf(x);
*signgamp = 0; if(__builtin_expect(x >= 0x1.18522p+5f, 0)){
return x - x; /* Overflow case. The original CORE-MATH code returns 0x1p127f * 0x1p127f,
but apparently some compilers replace this by +Inf. */
return math_narrow_eval (x * 0x1p127f);
}
/* compute k only after the overflow check, otherwise the case to integer
might overflow */
int k = fx;
if(__builtin_expect(fx==x, 0)){ /* x is integer */
if(x == 0.0f){
return 1.0f/x;
} }
if (__glibc_unlikely ((hx & 0x7f800000) == 0x7f800000)) if(x < 0.0f){
{ return 0.0f / 0.0f; /* should raise the "Invalid operation" exception */
/* Positive infinity (return positive infinity) or NaN (return
NaN). */
*signgamp = 0;
return x + x;
} }
double t0 = 1, x0 = 1;
for(int i=1; i<k; i++, x0 += 1.0) t0 *= x0;
return t0;
}
if(__builtin_expect(x<-42.0f, 0)){ /* negative non-integer */
/* For x < -42, x non-integer, |gamma(x)| < 2^-151. */
static const float sgn[2] = {0x1p-127f, -0x1p-127f};
/* Underflows always happens */
return math_narrow_eval (0x1p-127f * sgn[k&1]);
}
/* The array c[] stores a degree-15 polynomial approximation for gamma(x). */
static const double c[] =
{0x1.c9a76be577123p+0, 0x1.8f2754ddcf90dp+0, 0x1.0d1191949419bp+0, 0x1.e1f42cf0ae4a1p-2,
0x1.82b358a3ab638p-3, 0x1.e1f2b30cd907bp-5, 0x1.240f6d4071bd8p-6, 0x1.1522c9f3cd012p-8,
0x1.1fd0051a0525bp-10, 0x1.9808a8b96c37ep-13, 0x1.b3f78e01152b5p-15, 0x1.49c85a7e1fd04p-18,
0x1.471ca49184475p-19, -0x1.368f0b7ed9e36p-23, 0x1.882222f9049efp-23, -0x1.a69ed2042842cp-25};
if (x >= 36.0f) double m = z - 0x1.7p+1, i = __builtin_roundeven(m), step = __builtin_copysign(1.0,i);
{ double d = m - i, d2 = d*d, d4 = d2*d2, d8 = d4*d4;
/* Overflow. */ double f = (c[0] + d*c[1]) + d2*(c[2] + d*c[3]) + d4*((c[4] + d*c[5]) + d2*(c[6] + d*c[7]))
*signgamp = 0; + d8*((c[8] + d*c[9]) + d2*(c[10] + d*c[11]) + d4*((c[12] + d*c[13]) + d2*(c[14] + d*c[15])));
ret = math_narrow_eval (FLT_MAX * FLT_MAX); int jm = __builtin_fabs(i);
return ret; double w = 1;
if(jm){
z -= 0.5 + step*0.5;
w = z;
for(int j=jm-1; j; j--) {z -= step; w *= z;}
}
if(i<=-0.5) w = 1/w;
f *= w;
b64u64_u rt = {.f = f};
float r = f;
/* Deal with exceptional cases. */
if(__builtin_expect(((rt.u+2)&0xfffffff) < 8, 0)){
for(unsigned j=0;j<sizeof(tb)/sizeof(tb[0]);j++) {
if(t.u==tb[j].u) return tb[j].f + tb[j].df;
} }
else }
{ return r;
SET_RESTORE_ROUNDF (FE_TONEAREST);
if (x > 0.0f)
{
*signgamp = 0;
int exp2_adj;
float tret = gammaf_positive (x, &exp2_adj);
ret = __scalbnf (tret, exp2_adj);
}
else if (x >= -FLT_EPSILON / 4.0f)
{
*signgamp = 0;
ret = 1.0f / x;
}
else
{
float tx = truncf (x);
*signgamp = (tx == 2.0f * truncf (tx / 2.0f)) ? -1 : 1;
if (x <= -42.0f)
/* Underflow. */
ret = FLT_MIN * FLT_MIN;
else
{
float frac = tx - x;
if (frac > 0.5f)
frac = 1.0f - frac;
float sinpix = (frac <= 0.25f
? __sinf (M_PIf * frac)
: __cosf (M_PIf * (0.5f - frac)));
int exp2_adj;
float tret = M_PIf / (-x * sinpix
* gammaf_positive (-x, &exp2_adj));
ret = __scalbnf (tret, -exp2_adj);
math_check_force_underflow_nonneg (ret);
}
}
ret = math_narrow_eval (ret);
}
if (isinf (ret) && x != 0)
{
if (*signgamp < 0)
{
ret = math_narrow_eval (-copysignf (FLT_MAX, ret) * FLT_MAX);
ret = -ret;
}
else
ret = math_narrow_eval (copysignf (FLT_MAX, ret) * FLT_MAX);
return ret;
}
else if (ret == 0)
{
if (*signgamp < 0)
{
ret = math_narrow_eval (-copysignf (FLT_MIN, ret) * FLT_MIN);
ret = -ret;
}
else
ret = math_narrow_eval (copysignf (FLT_MIN, ret) * FLT_MIN);
return ret;
}
else
return ret;
} }
libm_alias_finite (__ieee754_gammaf_r, __gammaf_r) libm_alias_finite (__ieee754_gammaf_r, __gammaf_r)

View File

@ -1432,22 +1432,18 @@ ldouble: 3
Function: "tgamma": Function: "tgamma":
double: 9 double: 9
float: 8
ldouble: 4 ldouble: 4
Function: "tgamma_downward": Function: "tgamma_downward":
double: 9 double: 9
float: 7
ldouble: 5 ldouble: 5
Function: "tgamma_towardzero": Function: "tgamma_towardzero":
double: 9 double: 9
float: 7
ldouble: 5 ldouble: 5
Function: "tgamma_upward": Function: "tgamma_upward":
double: 9 double: 9
float: 8
ldouble: 4 ldouble: 4
Function: "y0": Function: "y0":

View File

@ -146,7 +146,6 @@ double: 1
Function: "tgamma": Function: "tgamma":
double: 1 double: 1
float: 1
Function: "y0": Function: "y0":
double: 2 double: 2

View File

@ -1208,22 +1208,18 @@ float: 1
Function: "tgamma": Function: "tgamma":
double: 3 double: 3
float: 9
ldouble: 9 ldouble: 9
Function: "tgamma_downward": Function: "tgamma_downward":
double: 3 double: 3
float: 9
ldouble: 9 ldouble: 9
Function: "tgamma_towardzero": Function: "tgamma_towardzero":
double: 3 double: 3
float: 9
ldouble: 9 ldouble: 9
Function: "tgamma_upward": Function: "tgamma_upward":
double: 2 double: 2
float: 9
ldouble: 9 ldouble: 9
Function: "y0": Function: "y0":

View File

@ -257,7 +257,6 @@ float: 2
Function: "tgamma": Function: "tgamma":
double: 5 double: 5
float: 4
Function: "y0": Function: "y0":
double: 2 double: 2

View File

@ -1156,19 +1156,15 @@ float: 3
Function: "tgamma": Function: "tgamma":
double: 9 double: 9
float: 8
Function: "tgamma_downward": Function: "tgamma_downward":
double: 9 double: 9
float: 7
Function: "tgamma_towardzero": Function: "tgamma_towardzero":
double: 9 double: 9
float: 7
Function: "tgamma_upward": Function: "tgamma_upward":
double: 9 double: 9
float: 8
Function: "y0": Function: "y0":
double: 3 double: 3

View File

@ -1444,22 +1444,18 @@ ldouble: 3
Function: "tgamma": Function: "tgamma":
double: 9 double: 9
float: 8
ldouble: 4 ldouble: 4
Function: "tgamma_downward": Function: "tgamma_downward":
double: 9 double: 9
float: 7
ldouble: 5 ldouble: 5
Function: "tgamma_towardzero": Function: "tgamma_towardzero":
double: 9 double: 9
float: 7
ldouble: 5 ldouble: 5
Function: "tgamma_upward": Function: "tgamma_upward":
double: 9 double: 9
float: 8
ldouble: 4 ldouble: 4
Function: "y0": Function: "y0":

View File

@ -266,7 +266,6 @@ float: 2
Function: "tgamma": Function: "tgamma":
double: 9 double: 9
float: 8
Function: "y0": Function: "y0":
double: 3 double: 3

View File

@ -1066,19 +1066,15 @@ float: 3
Function: "tgamma": Function: "tgamma":
double: 9 double: 9
float: 8
Function: "tgamma_downward": Function: "tgamma_downward":
double: 9 double: 9
float: 9
Function: "tgamma_towardzero": Function: "tgamma_towardzero":
double: 9 double: 9
float: 8
Function: "tgamma_upward": Function: "tgamma_upward":
double: 9 double: 9
float: 8
Function: "y0": Function: "y0":
double: 3 double: 3

View File

@ -1064,19 +1064,15 @@ float: 3
Function: "tgamma": Function: "tgamma":
double: 9 double: 9
float: 8
Function: "tgamma_downward": Function: "tgamma_downward":
double: 9 double: 9
float: 9
Function: "tgamma_towardzero": Function: "tgamma_towardzero":
double: 9 double: 9
float: 8
Function: "tgamma_upward": Function: "tgamma_upward":
double: 9 double: 9
float: 8
Function: "y0": Function: "y0":
double: 3 double: 3

View File

@ -1828,25 +1828,21 @@ ldouble: 6
Function: "tgamma": Function: "tgamma":
double: 9 double: 9
float: 8
float128: 4 float128: 4
ldouble: 5 ldouble: 5
Function: "tgamma_downward": Function: "tgamma_downward":
double: 9 double: 9
float: 7
float128: 5 float128: 5
ldouble: 6 ldouble: 6
Function: "tgamma_towardzero": Function: "tgamma_towardzero":
double: 9 double: 9
float: 7
float128: 5 float128: 5
ldouble: 5 ldouble: 5
Function: "tgamma_upward": Function: "tgamma_upward":
double: 9 double: 9
float: 8
float128: 4 float128: 4
ldouble: 5 ldouble: 5

View File

@ -1560,22 +1560,18 @@ ldouble: 6
Function: "tgamma": Function: "tgamma":
double: 9 double: 9
float: 8
ldouble: 5 ldouble: 5
Function: "tgamma_downward": Function: "tgamma_downward":
double: 9 double: 9
float: 7
ldouble: 5 ldouble: 5
Function: "tgamma_towardzero": Function: "tgamma_towardzero":
double: 9 double: 9
float: 7
ldouble: 5 ldouble: 5
Function: "tgamma_upward": Function: "tgamma_upward":
double: 9 double: 9
float: 8
ldouble: 4 ldouble: 4
Function: "y0": Function: "y0":

View File

@ -1361,22 +1361,18 @@ ldouble: 3
Function: "tgamma": Function: "tgamma":
double: 9 double: 9
float: 8
ldouble: 4 ldouble: 4
Function: "tgamma_downward": Function: "tgamma_downward":
double: 5 double: 5
float: 5
ldouble: 5 ldouble: 5
Function: "tgamma_towardzero": Function: "tgamma_towardzero":
double: 5 double: 5
float: 4
ldouble: 5 ldouble: 5
Function: "tgamma_upward": Function: "tgamma_upward":
double: 4 double: 4
float: 4
ldouble: 4 ldouble: 4
Function: "y0": Function: "y0":

View File

@ -1431,22 +1431,18 @@ ldouble: 3
Function: "tgamma": Function: "tgamma":
double: 9 double: 9
float: 8
ldouble: 4 ldouble: 4
Function: "tgamma_downward": Function: "tgamma_downward":
double: 9 double: 9
float: 7
ldouble: 5 ldouble: 5
Function: "tgamma_towardzero": Function: "tgamma_towardzero":
double: 9 double: 9
float: 7
ldouble: 5 ldouble: 5
Function: "tgamma_upward": Function: "tgamma_upward":
double: 8 double: 8
float: 8
ldouble: 4 ldouble: 4
Function: "y0": Function: "y0":

View File

@ -1429,22 +1429,18 @@ ldouble: 3
Function: "tgamma": Function: "tgamma":
double: 9 double: 9
float: 8
ldouble: 4 ldouble: 4
Function: "tgamma_downward": Function: "tgamma_downward":
double: 9 double: 9
float: 7
ldouble: 5 ldouble: 5
Function: "tgamma_towardzero": Function: "tgamma_towardzero":
double: 9 double: 9
float: 7
ldouble: 5 ldouble: 5
Function: "tgamma_upward": Function: "tgamma_upward":
double: 9 double: 9
float: 8
ldouble: 4 ldouble: 4
Function: "y0": Function: "y0":

View File

@ -532,11 +532,9 @@ float: 2
Function: "tgamma": Function: "tgamma":
double: 9 double: 9
float: 8
Function: "tgamma_towardzero": Function: "tgamma_towardzero":
double: 9 double: 9
float: 7
Function: "y0": Function: "y0":
double: 3 double: 3

View File

@ -1444,22 +1444,18 @@ ldouble: 3
Function: "tgamma": Function: "tgamma":
double: 9 double: 9
float: 8
ldouble: 4 ldouble: 4
Function: "tgamma_downward": Function: "tgamma_downward":
double: 9 double: 9
float: 7
ldouble: 5 ldouble: 5
Function: "tgamma_towardzero": Function: "tgamma_towardzero":
double: 9 double: 9
float: 7
ldouble: 5 ldouble: 5
Function: "tgamma_upward": Function: "tgamma_upward":
double: 9 double: 9
float: 8
ldouble: 4 ldouble: 4
Function: "y0": Function: "y0":

View File

@ -2263,25 +2263,21 @@ double: 1
Function: "tgamma": Function: "tgamma":
double: 9 double: 9
float: 8
float128: 4 float128: 4
ldouble: 5 ldouble: 5
Function: "tgamma_downward": Function: "tgamma_downward":
double: 9 double: 9
float: 7
float128: 5 float128: 5
ldouble: 6 ldouble: 6
Function: "tgamma_towardzero": Function: "tgamma_towardzero":
double: 9 double: 9
float: 7
float128: 5 float128: 5
ldouble: 6 ldouble: 6
Function: "tgamma_upward": Function: "tgamma_upward":
double: 9 double: 9
float: 8
float128: 4 float128: 4
ldouble: 5 ldouble: 5