glibc/sysdeps/powerpc/fpu/math_private.h
Adhemerval Zanella 41e8926aa4 PowerPC: Set/restore rounding mode only when needed
This patch helps some math functions performance by adding the libc_fexxx
variant of inline functions to handle both FPU round and exception set/restore
and by using them on the libc_fexxx_ctx functions. It is based on already coded
fexxx family functions for PPC with fpu.

Here is the summary of performance improvements due this patch (measured on a
POWER7 machine):

Before:

cos(): ITERS:9.5895e+07: TOTAL:5116.03Mcy, MAX:77.6cy, MIN:49.792cy, 18744 calls/Mcy
exp(): ITERS:2.827e+07: TOTAL:5187.15Mcy, MAX:494.018cy, MIN:38.422cy, 5450.01 calls/Mcy
pow(): ITERS:6.1705e+07: TOTAL:5144.26Mcy, MAX:171.95cy, MIN:29.935cy, 11994.9 calls/Mcy
sin(): ITERS:8.6898e+07: TOTAL:5117.06Mcy, MAX:83.841cy, MIN:46.582cy, 16982 calls/Mcy
tan(): ITERS:2.9473e+07: TOTAL:5115.39Mcy, MAX:191.017cy, MIN:172.352cy, 5761.63 calls/Mcy

After:

cos(): ITERS:2.05265e+08: TOTAL:5111.37Mcy, MAX:78.754cy, MIN:24.196cy, 40158.5 calls/Mcy
exp(): ITERS:3.341e+07: TOTAL:5170.84Mcy, MAX:476.317cy, MIN:15.574cy, 6461.23 calls/Mcy
pow(): ITERS:7.6153e+07: TOTAL:5129.1Mcy, MAX:147.5cy, MIN:30.916cy, 14847.2 calls/Mcy
sin(): ITERS:1.58816e+08: TOTAL:5115.11Mcy, MAX:1490.39cy, MIN:22.341cy, 31048.4 calls/Mcy
tan(): ITERS:3.4964e+07: TOTAL:5114.18Mcy, MAX:177.422cy, MIN:146.115cy, 6836.68 calls/Mcy
2013-11-25 06:34:41 -06:00

180 lines
3.9 KiB
C

/* Private inline math functions for powerpc.
Copyright (C) 2006-2013 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */
#ifndef _PPC_MATH_PRIVATE_H_
#define _PPC_MATH_PRIVATE_H_
#include <sysdep.h>
#include <ldsodefs.h>
#include <dl-procinfo.h>
#include <fenv_private.h>
#include_next <math_private.h>
# if __WORDSIZE == 64 || defined _ARCH_PWR4
# define __CPU_HAS_FSQRT 1
# else
# define __CPU_HAS_FSQRT ((GLRO(dl_hwcap) & PPC_FEATURE_64) != 0)
# endif
extern double __slow_ieee754_sqrt (double);
extern __always_inline double
__ieee754_sqrt (double __x)
{
double __z;
if (__CPU_HAS_FSQRT)
{
/* Volatile is required to prevent the compiler from moving the
fsqrt instruction above the branch. */
__asm __volatile ("fsqrt %0,%1" : "=f" (__z) : "f" (__x));
}
else
__z = __slow_ieee754_sqrt(__x);
return __z;
}
extern float __slow_ieee754_sqrtf (float);
extern __always_inline float
__ieee754_sqrtf (float __x)
{
float __z;
if (__CPU_HAS_FSQRT)
{
/* Volatile is required to prevent the compiler from moving the
fsqrts instruction above the branch. */
__asm __volatile ("fsqrts %0,%1" : "=f" (__z) : "f" (__x));
}
else
__z = __slow_ieee754_sqrtf(__x);
return __z;
}
#if defined _ARCH_PWR5X
# ifndef __round
# define __round(x) \
({ double __z; \
__asm __volatile ( \
" frin %0,%1\n" \
: "=f" (__z) \
: "f" (x)); \
__z; })
# endif
# ifndef __roundf
# define __roundf(x) \
({ float __z; \
__asm __volatile ( \
" frin %0,%1\n" \
" frsp %0,%0\n" \
: "=f" (__z) \
: "f" (x)); \
__z; })
# endif
# ifndef __trunc
# define __trunc(x) \
({ double __z; \
__asm __volatile ( \
" friz %0,%1\n" \
: "=f" (__z) \
: "f" (x)); \
__z; })
# endif
# ifndef __truncf
# define __truncf(x) \
({ float __z; \
__asm __volatile ( \
" friz %0,%1\n" \
" frsp %0,%0\n" \
: "=f" (__z) \
: "f" (x)); \
__z; })
# endif
# ifndef __ceil
# define __ceil(x) \
({ double __z; \
__asm __volatile ( \
" frip %0,%1\n" \
: "=f" (__z) \
: "f" (x)); \
__z; })
# endif
# ifndef __ceilf
# define __ceilf(x) \
({ float __z; \
__asm __volatile ( \
" frip %0,%1\n" \
" frsp %0,%0\n" \
: "=f" (__z) \
: "f" (x)); \
__z; })
# endif
# ifndef __floor
# define __floor(x) \
({ double __z; \
__asm __volatile ( \
" frim %0,%1\n" \
: "=f" (__z) \
: "f" (x)); \
__z; })
# endif
# ifndef __floorf
# define __floorf(x) \
({ float __z; \
__asm __volatile ( \
" frim %0,%1\n" \
" frsp %0,%0\n" \
: "=f" (__z) \
: "f" (x)); \
__z; })
# endif
#endif /* defined _ARCH_PWR5X */
#if defined _ARCH_PWR6
# ifndef __copysign
# define __copysign(x, y) \
({ double __z; \
__asm __volatile ( \
" fcpsgn %0,%1,%2\n" \
: "=f" (__z) \
: "f" (y), "f" (x)); \
__z; })
# endif
# ifndef __copysignf
# define __copysignf(x, y) \
({ float __z; \
__asm __volatile ( \
" fcpsgn %0,%1,%2\n" \
" frsp %0,%0\n" \
: "=f" (__z) \
: "f" (y), "f" (x)); \
__z; })
# endif
#endif /* defined _ARCH_PWR6 */
#endif /* _PPC_MATH_PRIVATE_H_ */