glibc/sysdeps/aarch64/fpu/coshf_sve.c
Joe Ramsay 7b8c134b54 AArch64: Improve codegen in SVE expf & related routines
Reduce MOV and MOVPRFX by improving special-case handling.  Use inline
helper to duplicate the entire computation between the special- and
non-special case branches, removing the contention for z0 between x
and the return value.

Also rearrange some MLAs and MLSs - by making the multiplicand the
destination we can avoid a MOVPRFX in several cases.  Also change which
constants go in the vector used for lanewise ops - the last lane is no
longer wasted.

Spotted that shift was incorrect in exp2f and exp10f, w.r.t. to the
comment that explains it.  Fixed - worst-case ULP for exp2f moves
around but it doesn't change significantly for either routine.

Worst-case error for coshf increases due to passing x to exp rather
than abs(x) - updated the comment, but does not require regen-ulps.

Reviewed-by: Wilco Dijkstra  <Wilco.Dijkstra@arm.com>
2024-09-23 15:44:07 +01:00

65 lines
2.3 KiB
C

/* Single-precision vector (SVE) cosh function
Copyright (C) 2024 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
#include "sv_math.h"
#include "sv_expf_inline.h"
static const struct data
{
struct sv_expf_data expf_consts;
float special_bound;
} data = {
.expf_consts = SV_EXPF_DATA,
/* 0x1.5a92d8p+6: expf overflows above this, so have to use special case. */
.special_bound = 0x1.5a92d8p+6,
};
static svfloat32_t NOINLINE
special_case (svfloat32_t x, svfloat32_t half_e, svfloat32_t half_over_e,
svbool_t pg)
{
return sv_call_f32 (coshf, x, svadd_x (svptrue_b32 (), half_e, half_over_e),
pg);
}
/* Single-precision vector cosh, using vector expf.
Maximum error is 2.77 ULP:
_ZGVsMxv_coshf(-0x1.5b38f4p+1) got 0x1.e45946p+2
want 0x1.e4594cp+2. */
svfloat32_t SV_NAME_F1 (cosh) (svfloat32_t x, svbool_t pg)
{
const struct data *d = ptr_barrier (&data);
svbool_t special = svacge (pg, x, d->special_bound);
/* Calculate cosh by exp(x) / 2 + exp(-x) / 2.
Note that x is passed to exp here, rather than |x|. This is to avoid using
destructive unary ABS for better register usage. However it means the
routine is not exactly symmetrical, as the exp helper is slightly less
accurate in the negative range. */
svfloat32_t e = expf_inline (x, pg, &d->expf_consts);
svfloat32_t half_e = svmul_x (svptrue_b32 (), e, 0.5);
svfloat32_t half_over_e = svdivr_x (pg, e, 0.5);
if (__glibc_unlikely (svptest_any (pg, special)))
return special_case (x, half_e, half_over_e, special);
return svadd_x (svptrue_b32 (), half_e, half_over_e);
}