mirror of
https://sourceware.org/git/glibc.git
synced 2024-12-02 09:40:13 +00:00
f1c56cdff0
SET_RESTORE_ROUND brackets a block of code, temporarily setting and restoring the rounding mode and letting everything else, including exceptions generated within the block, pass through. On powerpc, the current code clears the exception enables, which will hide exceptions generated within the block. This issue was introduced by me in commite905212627
. Fix this by not clearing exception enable bits in the prologue. Also, since we are no longer changing the enable bits in either the prologue or the epilogue, there is no need to test for entering/exiting non-stop mode. Also, optimize the prologue get/save/set rounding mode operations for POWER9 and later by using 'mffscrn' when possible. Suggested-by: Paul E. Murphy <murphyp@linux.ibm.com> Reviewed-by: Paul E. Murphy <murphyp@linux.ibm.com> Fixes:e905212627
2019-09-19 Paul A. Clarke <pc@us.ibm.com> * sysdeps/powerpc/fpu/fenv_libc.h (fegetenv_and_set_rn): New. (__fe_mffscrn): New. * sysdeps/powerpc/fpu/fenv_private.h (libc_feholdsetround_ppc_ctx): Do not clear enable bits, remove obsolete code, use fegetenv_and_set_rn. (libc_feresetround_ppc): Remove obsolete code, use fegetenv_and_set_rn.
286 lines
10 KiB
C
286 lines
10 KiB
C
/* Internal libc stuff for floating point environment routines.
|
|
Copyright (C) 1997-2019 Free Software Foundation, Inc.
|
|
This file is part of the GNU C Library.
|
|
|
|
The GNU C Library is free software; you can redistribute it and/or
|
|
modify it under the terms of the GNU Lesser General Public
|
|
License as published by the Free Software Foundation; either
|
|
version 2.1 of the License, or (at your option) any later version.
|
|
|
|
The GNU C Library is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
Lesser General Public License for more details.
|
|
|
|
You should have received a copy of the GNU Lesser General Public
|
|
License along with the GNU C Library; if not, see
|
|
<https://www.gnu.org/licenses/>. */
|
|
|
|
#ifndef _FENV_LIBC_H
|
|
#define _FENV_LIBC_H 1
|
|
|
|
#include <fenv.h>
|
|
#include <ldsodefs.h>
|
|
#include <sysdep.h>
|
|
|
|
extern const fenv_t *__fe_nomask_env_priv (void);
|
|
|
|
extern const fenv_t *__fe_mask_env (void) attribute_hidden;
|
|
|
|
/* The sticky bits in the FPSCR indicating exceptions have occurred. */
|
|
#define FPSCR_STICKY_BITS ((FE_ALL_EXCEPT | FE_ALL_INVALID) & ~FE_INVALID)
|
|
|
|
/* Equivalent to fegetenv, but returns a fenv_t instead of taking a
|
|
pointer. */
|
|
#define fegetenv_register() __builtin_mffs()
|
|
|
|
/* Equivalent to fegetenv_register, but only returns bits for
|
|
status, exception enables, and mode.
|
|
Nicely, it turns out that the 'mffsl' instruction will decode to
|
|
'mffs' on architectures older than "power9" because the additional
|
|
bits set for 'mffsl' are "don't care" for 'mffs'. 'mffs' is a superset
|
|
of 'mffsl'. */
|
|
#define fegetenv_status() \
|
|
({register double __fr; \
|
|
__asm__ __volatile__ ( \
|
|
".machine push; .machine \"power9\"; mffsl %0; .machine pop" \
|
|
: "=f" (__fr)); \
|
|
__fr; \
|
|
})
|
|
|
|
#define __fe_mffscrn(rn) \
|
|
({register fenv_union_t __fr; \
|
|
if (__builtin_constant_p (rn)) \
|
|
__asm__ __volatile__ ( \
|
|
".machine push; .machine \"power9\"; mffscrni %0,%1; .machine pop" \
|
|
: "=f" (__fr.fenv) : "i" (rn)); \
|
|
else \
|
|
{ \
|
|
__fr.l = (rn); \
|
|
__asm__ __volatile__ ( \
|
|
".machine push; .machine \"power9\"; mffscrn %0,%1; .machine pop" \
|
|
: "=f" (__fr.fenv) : "f" (__fr.fenv)); \
|
|
} \
|
|
__fr.fenv; \
|
|
})
|
|
|
|
/* Like fegetenv_status, but also sets the rounding mode. */
|
|
#ifdef _ARCH_PWR9
|
|
#define fegetenv_and_set_rn(rn) __fe_mffscrn (rn)
|
|
#else
|
|
/* 'mffscrn' will decode to 'mffs' on ARCH < 3_00, which is still necessary
|
|
but not sufficient, because it does not set the rounding mode.
|
|
Explicitly set the rounding mode when 'mffscrn' actually doesn't. */
|
|
#define fegetenv_and_set_rn(rn) \
|
|
({register fenv_union_t __fr; \
|
|
__fr.fenv = __fe_mffscrn (rn); \
|
|
if (__glibc_unlikely (!(GLRO(dl_hwcap2) & PPC_FEATURE2_ARCH_3_00))) \
|
|
__fesetround_inline (rn); \
|
|
__fr.fenv; \
|
|
})
|
|
#endif
|
|
|
|
/* Equivalent to fesetenv, but takes a fenv_t instead of a pointer. */
|
|
#define fesetenv_register(env) \
|
|
do { \
|
|
double d = (env); \
|
|
if(GLRO(dl_hwcap) & PPC_FEATURE_HAS_DFP) \
|
|
asm volatile (".machine push; " \
|
|
".machine \"power6\"; " \
|
|
"mtfsf 0xff,%0,1,0; " \
|
|
".machine pop" : : "f" (d)); \
|
|
else \
|
|
__builtin_mtfsf (0xff, d); \
|
|
} while(0)
|
|
|
|
/* Set the last 2 nibbles of the FPSCR, which contain the
|
|
exception enables and the rounding mode.
|
|
'fegetenv_status' retrieves these bits by reading the FPSCR. */
|
|
#define fesetenv_mode(env) __builtin_mtfsf (0b00000011, (env));
|
|
|
|
/* This very handy macro:
|
|
- Sets the rounding mode to 'round to nearest';
|
|
- Sets the processor into IEEE mode; and
|
|
- Prevents exceptions from being raised for inexact results.
|
|
These things happen to be exactly what you need for typical elementary
|
|
functions. */
|
|
#define relax_fenv_state() \
|
|
do { \
|
|
if (GLRO(dl_hwcap) & PPC_FEATURE_HAS_DFP) \
|
|
asm volatile (".machine push; .machine \"power6\"; " \
|
|
"mtfsfi 7,0,1; .machine pop"); \
|
|
asm volatile ("mtfsfi 7,0"); \
|
|
} while(0)
|
|
|
|
/* Set/clear a particular FPSCR bit (for instance,
|
|
reset_fpscr_bit(FPSCR_VE);
|
|
prevents INVALID exceptions from being raised). */
|
|
#define set_fpscr_bit(x) asm volatile ("mtfsb1 %0" : : "i"(x))
|
|
#define reset_fpscr_bit(x) asm volatile ("mtfsb0 %0" : : "i"(x))
|
|
|
|
typedef union
|
|
{
|
|
fenv_t fenv;
|
|
unsigned long long l;
|
|
} fenv_union_t;
|
|
|
|
|
|
static inline int
|
|
__fesetround_inline (int round)
|
|
{
|
|
if ((unsigned int) round < 2)
|
|
{
|
|
asm volatile ("mtfsb0 30");
|
|
if ((unsigned int) round == 0)
|
|
asm volatile ("mtfsb0 31");
|
|
else
|
|
asm volatile ("mtfsb1 31");
|
|
}
|
|
else
|
|
{
|
|
asm volatile ("mtfsb1 30");
|
|
if ((unsigned int) round == 2)
|
|
asm volatile ("mtfsb0 31");
|
|
else
|
|
asm volatile ("mtfsb1 31");
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
/* Same as __fesetround_inline, however without runtime check to use DFP
|
|
mtfsfi syntax (as relax_fenv_state) or if round value is valid. */
|
|
static inline void
|
|
__fesetround_inline_nocheck (const int round)
|
|
{
|
|
asm volatile ("mtfsfi 7,%0" : : "i" (round));
|
|
}
|
|
|
|
#define FPSCR_MASK(bit) (1 << (31 - (bit)))
|
|
|
|
/* Definitions of all the FPSCR bit numbers */
|
|
enum {
|
|
FPSCR_FX = 0, /* exception summary */
|
|
#define FPSCR_FX_MASK (FPSCR_MASK (FPSCR_FX))
|
|
FPSCR_FEX, /* enabled exception summary */
|
|
#define FPSCR_FEX_MASK (FPSCR_MASK FPSCR_FEX))
|
|
FPSCR_VX, /* invalid operation summary */
|
|
#define FPSCR_VX_MASK (FPSCR_MASK (FPSCR_VX))
|
|
FPSCR_OX, /* overflow */
|
|
#define FPSCR_OX_MASK (FPSCR_MASK (FPSCR_OX))
|
|
FPSCR_UX, /* underflow */
|
|
#define FPSCR_UX_MASK (FPSCR_MASK (FPSCR_UX))
|
|
FPSCR_ZX, /* zero divide */
|
|
#define FPSCR_ZX_MASK (FPSCR_MASK (FPSCR_ZX))
|
|
FPSCR_XX, /* inexact */
|
|
#define FPSCR_XX_MASK (FPSCR_MASK (FPSCR_XX))
|
|
FPSCR_VXSNAN, /* invalid operation for sNaN */
|
|
#define FPSCR_VXSNAN_MASK (FPSCR_MASK (FPSCR_VXSNAN))
|
|
FPSCR_VXISI, /* invalid operation for Inf-Inf */
|
|
#define FPSCR_VXISI_MASK (FPSCR_MASK (FPSCR_VXISI))
|
|
FPSCR_VXIDI, /* invalid operation for Inf/Inf */
|
|
#define FPSCR_VXIDI_MASK (FPSCR_MASK (FPSCR_VXIDI))
|
|
FPSCR_VXZDZ, /* invalid operation for 0/0 */
|
|
#define FPSCR_VXZDZ_MASK (FPSCR_MASK (FPSCR_VXZDZ))
|
|
FPSCR_VXIMZ, /* invalid operation for Inf*0 */
|
|
#define FPSCR_VXIMZ_MASK (FPSCR_MASK (FPSCR_VXIMZ))
|
|
FPSCR_VXVC, /* invalid operation for invalid compare */
|
|
#define FPSCR_VXVC_MASK (FPSCR_MASK (FPSCR_VXVC))
|
|
FPSCR_FR, /* fraction rounded [fraction was incremented by round] */
|
|
#define FPSCR_FR_MASK (FPSCR_MASK (FPSCR_FR))
|
|
FPSCR_FI, /* fraction inexact */
|
|
#define FPSCR_FI_MASK (FPSCR_MASK (FPSCR_FI))
|
|
FPSCR_FPRF_C, /* result class descriptor */
|
|
#define FPSCR_FPRF_C_MASK (FPSCR_MASK (FPSCR_FPRF_C))
|
|
FPSCR_FPRF_FL, /* result less than (usually, less than 0) */
|
|
#define FPSCR_FPRF_FL_MASK (FPSCR_MASK (FPSCR_FPRF_FL))
|
|
FPSCR_FPRF_FG, /* result greater than */
|
|
#define FPSCR_FPRF_FG_MASK (FPSCR_MASK (FPSCR_FPRF_FG))
|
|
FPSCR_FPRF_FE, /* result equal to */
|
|
#define FPSCR_FPRF_FE_MASK (FPSCR_MASK (FPSCR_FPRF_FE))
|
|
FPSCR_FPRF_FU, /* result unordered */
|
|
#define FPSCR_FPRF_FU_MASK (FPSCR_MASK (FPSCR_FPRF_FU))
|
|
FPSCR_20, /* reserved */
|
|
FPSCR_VXSOFT, /* invalid operation set by software */
|
|
#define FPSCR_VXSOFT_MASK (FPSCR_MASK (FPSCR_VXSOFT))
|
|
FPSCR_VXSQRT, /* invalid operation for square root */
|
|
#define FPSCR_VXSQRT_MASK (FPSCR_MASK (FPSCR_VXSQRT))
|
|
FPSCR_VXCVI, /* invalid operation for invalid integer convert */
|
|
#define FPSCR_VXCVI_MASK (FPSCR_MASK (FPSCR_VXCVI))
|
|
FPSCR_VE, /* invalid operation exception enable */
|
|
#define FPSCR_VE_MASK (FPSCR_MASK (FPSCR_VE))
|
|
FPSCR_OE, /* overflow exception enable */
|
|
#define FPSCR_OE_MASK (FPSCR_MASK (FPSCR_OE))
|
|
FPSCR_UE, /* underflow exception enable */
|
|
#define FPSCR_UE_MASK (FPSCR_MASK (FPSCR_UE))
|
|
FPSCR_ZE, /* zero divide exception enable */
|
|
#define FPSCR_ZE_MASK (FPSCR_MASK (FPSCR_ZE))
|
|
FPSCR_XE, /* inexact exception enable */
|
|
#define FPSCR_XE_MASK (FPSCR_MASK (FPSCR_XE))
|
|
#ifdef _ARCH_PWR6
|
|
FPSCR_29, /* Reserved in ISA 2.05 */
|
|
#define FPSCR_NI_MASK (FPSCR_MASK (FPSCR_29))
|
|
#else
|
|
FPSCR_NI, /* non-IEEE mode (typically, no denormalised numbers) */
|
|
#define FPSCR_NI_MASK (FPSCR_MASK (FPSCR_NI))
|
|
#endif /* _ARCH_PWR6 */
|
|
/* the remaining two least-significant bits keep the rounding mode */
|
|
FPSCR_RN_hi,
|
|
#define FPSCR_RN_hi_MASK (FPSCR_MASK (FPSCR_RN_hi))
|
|
FPSCR_RN_lo
|
|
#define FPSCR_RN_lo_MASK (FPSCR_MASK (FPSCR_RN_lo))
|
|
};
|
|
|
|
#define FPSCR_RN_MASK (FPSCR_RN_hi_MASK|FPSCR_RN_lo_MASK)
|
|
#define FPSCR_ENABLES_MASK \
|
|
(FPSCR_VE_MASK|FPSCR_OE_MASK|FPSCR_UE_MASK|FPSCR_ZE_MASK|FPSCR_XE_MASK)
|
|
#define FPSCR_BASIC_EXCEPTIONS_MASK \
|
|
(FPSCR_VX_MASK|FPSCR_OX_MASK|FPSCR_UX_MASK|FPSCR_ZX_MASK|FPSCR_XX_MASK)
|
|
#define FPSCR_FPRF_MASK \
|
|
(FPSCR_FPRF_C_MASK|FPSCR_FPRF_FL_MASK|FPSCR_FPRF_FG_MASK| \
|
|
FPSCR_FPRF_FE_MASK|FPSCR_FPRF_FU_MASK)
|
|
#define FPSCR_CONTROL_MASK (FPSCR_ENABLES_MASK|FPSCR_NI_MASK|FPSCR_RN_MASK)
|
|
#define FPSCR_STATUS_MASK (FPSCR_FR_MASK|FPSCR_FI_MASK|FPSCR_FPRF_MASK)
|
|
|
|
/* The bits in the FENV(1) ABI for exceptions correspond one-to-one with bits
|
|
in the FPSCR, albeit shifted to different but corresponding locations.
|
|
Similarly, the exception indicator bits in the FPSCR correspond one-to-one
|
|
with the exception enable bits. It is thus possible to map the FENV(1)
|
|
exceptions directly to the FPSCR enables with a simple mask and shift,
|
|
and vice versa. */
|
|
#define FPSCR_EXCEPT_TO_ENABLE_SHIFT 22
|
|
|
|
static inline int
|
|
fenv_reg_to_exceptions (unsigned long long l)
|
|
{
|
|
return (((int)l) & FPSCR_ENABLES_MASK) << FPSCR_EXCEPT_TO_ENABLE_SHIFT;
|
|
}
|
|
|
|
static inline unsigned long long
|
|
fenv_exceptions_to_reg (int excepts)
|
|
{
|
|
return (unsigned long long)
|
|
(excepts & FE_ALL_EXCEPT) >> FPSCR_EXCEPT_TO_ENABLE_SHIFT;
|
|
}
|
|
|
|
#ifdef _ARCH_PWR6
|
|
/* Not supported in ISA 2.05. Provided for source compat only. */
|
|
# define FPSCR_NI 29
|
|
#endif /* _ARCH_PWR6 */
|
|
|
|
/* This operation (i) sets the appropriate FPSCR bits for its
|
|
parameter, (ii) converts sNaN to the corresponding qNaN, and (iii)
|
|
otherwise passes its parameter through unchanged (in particular, -0
|
|
and +0 stay as they were). The `obvious' way to do this is optimised
|
|
out by gcc. */
|
|
#define f_wash(x) \
|
|
({ double d; asm volatile ("fmul %0,%1,%2" \
|
|
: "=f"(d) \
|
|
: "f" (x), "f"((float)1.0)); d; })
|
|
#define f_washf(x) \
|
|
({ float f; asm volatile ("fmuls %0,%1,%2" \
|
|
: "=f"(f) \
|
|
: "f" (x), "f"((float)1.0)); f; })
|
|
|
|
#endif /* fenv_libc.h */
|