glibc/sysdeps/ieee754/flt-32/s_cosf.c
Wilco Dijkstra 599cf39766 Improve performance of sinf and cosf
The second patch improves performance of sinf and cosf using the same
algorithms and polynomials.  The returned values are identical to sincosf
for the same input.  ULP definitions for AArch64 and x64 are updated.

sinf/cosf througput gains on Cortex-A72:
* |x| < 0x1p-12 : 1.2x
* |x| < M_PI_4  : 1.8x
* |x| < 2 * M_PI: 1.7x
* |x| < 120.0   : 2.3x
* |x| < Inf     : 3.0x

	* NEWS: Mention sinf, cosf, sincosf.
	* sysdeps/aarch64/libm-test-ulps: Update ULP for sinf, cosf, sincosf.
	* sysdeps/x86_64/fpu/libm-test-ulps: Update ULP for sinf and cosf.
	* sysdeps/x86_64/fpu/multiarch/s_sincosf-fma.c: Add definitions of
	constants rather than including generic sincosf.h.
	* sysdeps/x86_64/fpu/s_sincosf_data.c: Remove.
	* sysdeps/ieee754/flt-32/s_cosf.c (cosf): Rewrite.
	* sysdeps/ieee754/flt-32/s_sincosf.h (reduced_sin): Remove.
	(reduced_cos): Remove.
	(sinf_poly): New function.
	* sysdeps/ieee754/flt-32/s_sinf.c (sinf): Rewrite.
2018-08-14 10:45:59 +01:00

88 lines
2.2 KiB
C

/* Compute cosine of argument.
Copyright (C) 2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */
#include <stdint.h>
#include <math.h>
#include <math-barriers.h>
#include <libm-alias-float.h>
#include "math_config.h"
#include "s_sincosf.h"
#ifndef COSF
# define COSF_FUNC __cosf
#else
# define COSF_FUNC COSF
#endif
/* Fast cosf implementation. Worst-case ULP is 0.5607, maximum relative
error is 0.5303 * 2^-23. A single-step range reduction is used for
small values. Large inputs have their range reduced using fast integer
arithmetic.
*/
float
COSF_FUNC (float y)
{
double x = y;
double s;
int n;
const sincos_t *p = &__sincosf_table[0];
if (abstop12 (y) < abstop12 (pio4))
{
double x2 = x * x;
if (__glibc_unlikely (abstop12 (y) < abstop12 (0x1p-12f)))
return 1.0f;
return sinf_poly (x, x2, p, 1);
}
else if (__glibc_likely (abstop12 (y) < abstop12 (120.0f)))
{
x = reduce_fast (x, p, &n);
/* Setup the signs for sin and cos. */
s = p->sign[n & 3];
if (n & 2)
p = &__sincosf_table[1];
return sinf_poly (x * s, x * x, p, n ^ 1);
}
else if (abstop12 (y) < abstop12 (INFINITY))
{
uint32_t xi = asuint (y);
int sign = xi >> 31;
x = reduce_large (xi, &n);
/* Setup signs for sin and cos - include original sign. */
s = p->sign[(n + sign) & 3];
if ((n + sign) & 2)
p = &__sincosf_table[1];
return sinf_poly (x * s, x * x, p, n ^ 1);
}
else
return __math_invalidf (y);
}
#ifndef COSF
libm_alias_float (__cos, cos)
#endif