glibc/sysdeps/powerpc/fpu/fenv_private.h
Paul A. Clarke e905212627 [powerpc] SET_RESTORE_ROUND improvements
SET_RESTORE_ROUND uses libc_feholdsetround_ppc_ctx and
libc_feresetround_ppc_ctx to bracket a block of code where the floating point
rounding mode must be set to a certain value.

For the *prologue*, libc_feholdsetround_ppc_ctx is used and performs:
1. Read/save FPSCR.
2. Create new value for FPSCR with new rounding mode and enables cleared.
3. If new value is different than current value,
   a. If transitioning from a state where some exceptions enabled,
      enter "ignore exceptions / non-stop" mode.
   b. Write new value to FPSCR.
   c. Put a mark on the wall indicating the FPSCR was changed.

(1) uses the 'mffs' instruction.  On POWER9, the lighter weight 'mffsl'
instruction can be used, but it doesn't return all of the bits in the FPSCR.
fegetenv_status uses 'mffsl' on POWER9, 'mffs' otherwise, and can thus be
used instead of fegetenv_register.
(3b) uses 'mtfsf 0b11111111' to write the entire FPSCR, so it must
instead use 'mtfsf 0b00000011' to write just the enables and the mode,
because some of the rest of the bits are not valid if 'mffsl' was used.
fesetenv_mode uses 'mtfsf 0b00000011' on POWER9, 'mtfsf 0b11111111'
otherwise.

For the *epilogue*, libc_feresetround_ppc_ctx checks the mark on the wall, then
calls libc_feresetround_ppc, which just calls __libc_femergeenv_ppc with
parameters such that it performs:
1. Retreive saved value of FPSCR, saved in prologue above.
2. Read FPSCR.
3. Create new value of FPSCR where:
   - Summary bits and exception indicators = current OR saved.
   - Rounding mode and enables = saved.
   - Status bits = current.
4. If transitioning from some exceptions enabled to none,
   enter "ignore exceptions / non-stop" mode.
5. If transitioning from no exceptions enabled to some,
   enter "catch exceptions" mode.
6. Write new value to FPSCR.

The summary bits are hardwired to the exception indicators, so there is no
need to restore any saved summary bits.
The exception indicator bits, which are sticky and remain set unless
explicitly cleared, would only need to be restored if the code block
might explicitly clear any of them.  This is certainly not expected.

So, the only bits that need to be restored are the enables and the mode.
If it is the case that only those bits are to be restored, there is no need to
read the FPSCR.  Steps (2) and (3) are unnecessary, and step (6) only needs to
write the bits being restored.

We know we are transitioning out of "ignore exceptions" mode, so step (4) is
unnecessary, and in step (6), we only need to check the state we are
entering.
2019-08-28 13:51:10 -05:00

265 lines
8.3 KiB
C

/* Private floating point rounding and exceptions handling. PowerPC version.
Copyright (C) 2013-2019 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library. If not, see
<http://www.gnu.org/licenses/>. */
#ifndef POWERPC_FENV_PRIVATE_H
#define POWERPC_FENV_PRIVATE_H 1
#include <fenv.h>
#include <fenv_libc.h>
#include <fpu_control.h>
/* Mask for the exception enable bits. */
#define _FPU_ALL_TRAPS (_FPU_MASK_ZM | _FPU_MASK_OM | _FPU_MASK_UM \
| _FPU_MASK_XM | _FPU_MASK_IM)
/* Mask the rounding mode bits. */
#define _FPU_MASK_RN 0xfffffffffffffffcLL
/* Mask everything but the rounding modes and non-IEEE arithmetic flags. */
#define _FPU_MASK_NOT_RN_NI 0xffffffff00000807LL
/* Mask restore rounding mode and exception enabled. */
#define _FPU_MASK_TRAPS_RN 0xffffffffffffff00LL
/* Mask FP result flags, preserve fraction rounded/inexact bits. */
#define _FPU_MASK_FRAC_INEX_RET_CC 0xfffffffffff80fffLL
static __always_inline void
__libc_feholdbits_ppc (fenv_t *envp, unsigned long long mask,
unsigned long long bits)
{
fenv_union_t old, new;
old.fenv = *envp = fegetenv_register ();
new.l = (old.l & mask) | bits;
/* If the old env had any enabled exceptions, then mask SIGFPE in the
MSR FE0/FE1 bits. This may allow the FPU to run faster because it
always takes the default action and can not generate SIGFPE. */
if ((old.l & _FPU_ALL_TRAPS) != 0)
(void) __fe_mask_env ();
fesetenv_register (new.fenv);
}
static __always_inline void
libc_feholdexcept_ppc (fenv_t *envp)
{
__libc_feholdbits_ppc (envp, _FPU_MASK_NOT_RN_NI, 0LL);
}
static __always_inline void
libc_feholdexcept_setround_ppc (fenv_t *envp, int r)
{
__libc_feholdbits_ppc (envp, _FPU_MASK_NOT_RN_NI & _FPU_MASK_RN, r);
}
static __always_inline void
libc_fesetround_ppc (int r)
{
__fesetround_inline (r);
}
static __always_inline int
libc_fetestexcept_ppc (int e)
{
fenv_union_t u;
u.fenv = fegetenv_register ();
return u.l & e;
}
static __always_inline void
libc_feholdsetround_ppc (fenv_t *e, int r)
{
__libc_feholdbits_ppc (e, _FPU_MASK_TRAPS_RN, r);
}
static __always_inline unsigned long long
__libc_femergeenv_ppc (const fenv_t *envp, unsigned long long old_mask,
unsigned long long new_mask)
{
fenv_union_t old, new;
new.fenv = *envp;
old.fenv = fegetenv_register ();
/* Merge bits while masking unwanted bits from new and old env. */
new.l = (old.l & old_mask) | (new.l & new_mask);
/* If the old env has no enabled exceptions and the new env has any enabled
exceptions, then unmask SIGFPE in the MSR FE0/FE1 bits. This will put the
hardware into "precise mode" and may cause the FPU to run slower on some
hardware. */
if ((old.l & _FPU_ALL_TRAPS) == 0 && (new.l & _FPU_ALL_TRAPS) != 0)
(void) __fe_nomask_env_priv ();
/* If the old env had any enabled exceptions and the new env has no enabled
exceptions, then mask SIGFPE in the MSR FE0/FE1 bits. This may allow the
FPU to run faster because it always takes the default action and can not
generate SIGFPE. */
if ((old.l & _FPU_ALL_TRAPS) != 0 && (new.l & _FPU_ALL_TRAPS) == 0)
(void) __fe_mask_env ();
/* Atomically enable and raise (if appropriate) exceptions set in `new'. */
fesetenv_register (new.fenv);
return old.l;
}
static __always_inline void
libc_fesetenv_ppc (const fenv_t *envp)
{
/* Replace the entire environment. */
__libc_femergeenv_ppc (envp, 0LL, -1LL);
}
static __always_inline void
libc_feresetround_ppc (fenv_t *envp)
{
fenv_union_t new = { .fenv = *envp };
/* If the old env has no enabled exceptions and the new env has any enabled
exceptions, then unmask SIGFPE in the MSR FE0/FE1 bits. This will put the
hardware into "precise mode" and may cause the FPU to run slower on some
hardware. */
if ((new.l & _FPU_ALL_TRAPS) != 0)
(void) __fe_nomask_env_priv ();
/* Atomically enable and raise (if appropriate) exceptions set in `new'. */
fesetenv_mode (new.fenv);
}
static __always_inline int
libc_feupdateenv_test_ppc (fenv_t *envp, int ex)
{
return __libc_femergeenv_ppc (envp, _FPU_MASK_TRAPS_RN,
_FPU_MASK_FRAC_INEX_RET_CC) & ex;
}
static __always_inline void
libc_feupdateenv_ppc (fenv_t *e)
{
libc_feupdateenv_test_ppc (e, 0);
}
#define libc_feholdexceptf libc_feholdexcept_ppc
#define libc_feholdexcept libc_feholdexcept_ppc
#define libc_feholdexcept_setroundf libc_feholdexcept_setround_ppc
#define libc_feholdexcept_setround libc_feholdexcept_setround_ppc
#define libc_fetestexceptf libc_fetestexcept_ppc
#define libc_fetestexcept libc_fetestexcept_ppc
#define libc_fesetroundf libc_fesetround_ppc
#define libc_fesetround libc_fesetround_ppc
#define libc_fesetenvf libc_fesetenv_ppc
#define libc_fesetenv libc_fesetenv_ppc
#define libc_feupdateenv_testf libc_feupdateenv_test_ppc
#define libc_feupdateenv_test libc_feupdateenv_test_ppc
#define libc_feupdateenvf libc_feupdateenv_ppc
#define libc_feupdateenv libc_feupdateenv_ppc
#define libc_feholdsetroundf libc_feholdsetround_ppc
#define libc_feholdsetround libc_feholdsetround_ppc
#define libc_feresetroundf libc_feresetround_ppc
#define libc_feresetround libc_feresetround_ppc
/* We have support for rounding mode context. */
#define HAVE_RM_CTX 1
static __always_inline void
libc_feholdsetround_ppc_ctx (struct rm_ctx *ctx, int r)
{
fenv_union_t old, new;
old.fenv = fegetenv_status ();
new.l = (old.l & ~(FPSCR_ENABLES_MASK|FPSCR_RN_MASK)) | r;
ctx->env = old.fenv;
if (__glibc_unlikely (new.l != old.l))
{
if ((old.l & _FPU_ALL_TRAPS) != 0)
(void) __fe_mask_env ();
fesetenv_mode (new.fenv);
ctx->updated_status = true;
}
else
ctx->updated_status = false;
}
static __always_inline void
libc_feholdsetround_noex_ppc_ctx (struct rm_ctx *ctx, int r)
{
fenv_union_t old, new;
old.fenv = fegetenv_register ();
new.l = (old.l & ~(FPSCR_ENABLES_MASK|FPSCR_RN_MASK)) | r;
ctx->env = old.fenv;
if (__glibc_unlikely (new.l != old.l))
{
if ((old.l & _FPU_ALL_TRAPS) != 0)
(void) __fe_mask_env ();
fesetenv_register (new.fenv);
ctx->updated_status = true;
}
else
ctx->updated_status = false;
}
static __always_inline void
libc_fesetenv_ppc_ctx (struct rm_ctx *ctx)
{
libc_fesetenv_ppc (&ctx->env);
}
static __always_inline void
libc_feupdateenv_ppc_ctx (struct rm_ctx *ctx)
{
if (__glibc_unlikely (ctx->updated_status))
libc_feresetround_ppc (&ctx->env);
}
static __always_inline void
libc_feresetround_ppc_ctx (struct rm_ctx *ctx)
{
if (__glibc_unlikely (ctx->updated_status))
libc_feresetround_ppc (&ctx->env);
}
#define libc_fesetenv_ctx libc_fesetenv_ppc_ctx
#define libc_fesetenvf_ctx libc_fesetenv_ppc_ctx
#define libc_fesetenvl_ctx libc_fesetenv_ppc_ctx
#define libc_feholdsetround_ctx libc_feholdsetround_ppc_ctx
#define libc_feholdsetroundf_ctx libc_feholdsetround_ppc_ctx
#define libc_feholdsetroundl_ctx libc_feholdsetround_ppc_ctx
#define libc_feholdsetround_noex_ctx libc_feholdsetround_noex_ppc_ctx
#define libc_feholdsetround_noexf_ctx libc_feholdsetround_noex_ppc_ctx
#define libc_feholdsetround_noexl_ctx libc_feholdsetround_noex_ppc_ctx
#define libc_feresetround_ctx libc_feresetround_ppc_ctx
#define libc_feresetroundf_ctx libc_feresetround_ppc_ctx
#define libc_feresetroundl_ctx libc_feresetround_ppc_ctx
#define libc_feupdateenv_ctx libc_feupdateenv_ppc_ctx
#define libc_feupdateenvf_ctx libc_feupdateenv_ppc_ctx
#define libc_feupdateenvl_ctx libc_feupdateenv_ppc_ctx
#include_next <fenv_private.h>
#endif