glibc/sysdeps/ieee754/flt-32/s_cosf.c
Adhemerval Zanella da39afa4ff i686: Use generic cosf implementation for SSE2 version
Performance seems to be similar (gcc 11.2.1 on a Ryzen 9 5900X):

* s_cosf-sse2.S:
  "cosf": {
   "workload-random": {
    "duration": 3.74987e+09,
    "iterations": 9.616e+07,
    "reciprocal-throughput": 15.8141,
    "latency": 62.1782,
    "max-throughput": 6.32346e+07,
    "min-throughput": 1.60828e+07
   }
  }

* generic s_cosf.c:
  "cosf": {
   "workload-random": {
    "duration": 3.87298e+09,
    "iterations": 1.00968e+08,
    "reciprocal-throughput": 18.3448,
    "latency": 58.3722,
    "max-throughput": 5.45113e+07,
    "min-throughput": 1.71314e+07
   }
  }

Checked on i686-linux-gnu.
2022-06-01 10:47:44 -03:00

93 lines
2.3 KiB
C

/* Compute cosine of argument.
Copyright (C) 2018-2022 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
#include <stdint.h>
#include <math.h>
#include <math-barriers.h>
#include <libm-alias-float.h>
#include "math_config.h"
#include "s_sincosf.h"
#ifndef SECTION
# define SECTION
#endif
#ifndef COSF
# define COSF_FUNC __cosf
#else
# define COSF_FUNC COSF
#endif
/* Fast cosf implementation. Worst-case ULP is 0.5607, maximum relative
error is 0.5303 * 2^-23. A single-step range reduction is used for
small values. Large inputs have their range reduced using fast integer
arithmetic.
*/
float
SECTION
COSF_FUNC (float y)
{
double x = y;
double s;
int n;
const sincos_t *p = &__sincosf_table[0];
if (abstop12 (y) < abstop12 (pio4))
{
double x2 = x * x;
if (__glibc_unlikely (abstop12 (y) < abstop12 (0x1p-12f)))
return 1.0f;
return sinf_poly (x, x2, p, 1);
}
else if (__glibc_likely (abstop12 (y) < abstop12 (120.0f)))
{
x = reduce_fast (x, p, &n);
/* Setup the signs for sin and cos. */
s = p->sign[n & 3];
if (n & 2)
p = &__sincosf_table[1];
return sinf_poly (x * s, x * x, p, n ^ 1);
}
else if (abstop12 (y) < abstop12 (INFINITY))
{
uint32_t xi = asuint (y);
int sign = xi >> 31;
x = reduce_large (xi, &n);
/* Setup signs for sin and cos - include original sign. */
s = p->sign[(n + sign) & 3];
if ((n + sign) & 2)
p = &__sincosf_table[1];
return sinf_poly (x * s, x * x, p, n ^ 1);
}
else
return __math_invalidf (y);
}
#ifndef COSF
libm_alias_float (__cos, cos)
#endif