PowerPC: Arithmetic function optimizations for POWER

2024-11-25 22:40:05 +00:00 · 2011-11-11 13:33:38 -05:00 · 2011-11-11 13:33:38 -05:00 · 8a6d525522
commit 8a6d525522
parent 1d8f7ddc04
6 changed files with 149 additions and 4 deletions
--- a/11
+++ b/11
@ -1,3 +1,14 @@
+2011-11-07  Adhemerval Zanella  <azanella@linux.vnet.ibm.com>
+
+	* sysdeps/powerpc/fpu/math_private.h: Using inline assembly version
+	of math functions ceil, trunc, floor, round, and sqrt, when
+	avaliable on the platform.
+	* sysdeps/powerpc/fpu/e_sqrt.c: Undefine __ieee754_sqrt to avoid
+	name clash.
+	* sysdeps/powerpc/fpu/e_sqrtf.c: Likewise.
+	* sysdeps/powerpc/powerpc64/fpu/e_sqrt.c: Likewise.
+	* sysdeps/powerpc/powerpc64/fpu/e_sqrtf.c: Likewise.
+
 2011-10-30  Marek Polacek  <mpolacek@redhat.com>

 	* libio/wfileops.c (_IO_wfile_underflow_mmap): Remove unused variable.
--- a/sysdeps/powerpc/fpu/e_sqrt.c
+++ b/sysdeps/powerpc/fpu/e_sqrt.c
@ -154,6 +154,7 @@ __slow_ieee754_sqrt (double x)
  return f_wash (x);
 }

+#undef __ieee754_sqrt
 double
 __ieee754_sqrt (double x)
 {
--- a/sysdeps/powerpc/fpu/e_sqrtf.c
+++ b/sysdeps/powerpc/fpu/e_sqrtf.c
@ -130,7 +130,7 @@ __slow_ieee754_sqrtf (float x)
  return f_washf (x);
 }

-
+#undef __ieee754_sqrtf
 float
 __ieee754_sqrtf (float x)
 {
--- a/sysdeps/powerpc/fpu/math_private.h
+++ b/sysdeps/powerpc/fpu/math_private.h
@ -1,5 +1,5 @@
 /* Private inline math functions for powerpc.
-   Copyright (C) 2006
+   Copyright (C) 2006, 2011
   Free Software Foundation, Inc.
   This file is part of the GNU C Library.

@ -25,11 +25,144 @@
 #include <ldsodefs.h>
 #include <dl-procinfo.h>

+#include <math/math_private.h>
+
 # if __WORDSIZE == 64 || defined _ARCH_PWR4
 #  define __CPU_HAS_FSQRT 1
+
+#ifndef __ieee754_sqrt
+# define __ieee754_sqrt(x)		\
+  ({ double __z;			\
+     __asm __volatile (			\
+	"	fsqrt %0,%1\n"		\
+		: "=f" (__z)		\
+		: "f"(x));		\
+     __z; })
+#endif
+#ifndef __ieee754_sqrtf
+# define __ieee754_sqrtf(x)		\
+  ({ float __z;				\
+     __asm __volatile (			\
+	"	fsqrts %0,%1\n"		\
+		: "=f" (__z)		\
+		: "f"(x));		\
+     __z; })
+#endif
+
 # else
 #  define __CPU_HAS_FSQRT ((GLRO(dl_hwcap) & PPC_FEATURE_64) != 0)
+# endif	// __WORDSIZE == 64 || defined _ARCH_PWR4
+
+
+#if defined _ARCH_PWR5X
+
+# ifndef __round
+#  define __round(x)			\
+    ({ double __z;			\
+      __asm __volatile (		\
+	"	frin %0,%1\n"		\
+		: "=f" (__z)		\
+		: "f" (x));		\
+     __z; })
 # endif
+# ifndef __roundf
+#  define __roundf(x)			\
+    ({ float __z;			\
+     __asm __volatile (			\
+	"	frin %0,%1\n"		\
+	"	frsp %0,%0\n"		\
+		: "=f" (__z)		\
+		: "f" (x));		\
+     __z; })
+# endif
+
+# ifndef __trunc
+#  define __trunc(x)			\
+    ({ double __z;			\
+     __asm __volatile (			\
+	"	friz %0,%1\n"		\
+		: "=f" (__z)		\
+		: "f" (x));		\
+     __z; })
+# endif
+# ifndef __truncf
+#  define __truncf(x)			\
+    ({ float __z;			\
+     __asm __volatile (			\
+	"	friz %0,%1\n"		\
+	"	frsp %0,%0\n"		\
+		: "=f" (__z)		\
+		: "f" (x));		\
+     __z; })
+# endif
+
+# ifndef __ceil
+#  define __ceil(x)			\
+    ({ double __z;			\
+     __asm __volatile (			\
+	"	frip %0,%1\n"		\
+		: "=f" (__z)		\
+		: "f" (x));		\
+     __z; })
+# endif
+# ifndef __ceilf
+#  define __ceilf(x)			\
+    ({ float __z;			\
+     __asm __volatile (			\
+	"	frip %0,%1\n"		\
+	"	frsp %0,%0\n"		\
+		: "=f" (__z)		\
+		: "f" (x));		\
+     __z; })
+# endif
+
+# ifndef __floor
+#  define __floor(x)			\
+    ({ double __z;			\
+     __asm __volatile (			\
+	"	frim %0,%1\n"		\
+		: "=f" (__z)		\
+		: "f" (x));		\
+     __z; })
+# endif
+# ifndef __floorf
+#  define __floorf(x)			\
+    ({ float __z;			\
+     __asm __volatile (			\
+	"	frim %0,%1\n"		\
+	"	frsp %0,%0\n"		\
+		: "=f" (__z)		\
+		: "f" (x));		\
+     __z; })
+# endif
+
+#endif	/* defined _ARCH_PWR5X */
+
+
+#if defined _ARCH_PWR6
+
+# ifndef __copysign
+#  define __copysign(x, y)		\
+    ({ double __z;			\
+     __asm __volatile (			\
+	"	fcpsgn %0,%1,%2\n"	\
+		: "=f" (__z)		\
+		: "f" (y), "f" (x));	\
+     __z; })
+# endif
+# ifndef __copysignf
+#  define __copysignf(x, y)		\
+    ({ float __z;			\
+     __asm __volatile (			\
+	"	fcpsgn %0,%1,%2\n"	\
+	"	frsp %0,%0\n"		\
+		: "=f" (__z)		\
+		: "f" (y), "f" (x));	\
+     __z; })
+# endif
+
+#endif /* defined _ARCH_PWR6 */
+

 # ifndef __LIBC_INTERNAL_MATH_INLINES
 extern double __slow_ieee754_sqrt (double);
@ -78,6 +211,4 @@ __ieee754_sqrtf (float __x)
 }
 #endif /* __LIBC_INTERNAL_MATH_INLINES */

-#include <math/math_private.h>
-
 #endif /* _PPC_MATH_PRIVATE_H_ */
--- a/sysdeps/powerpc/powerpc64/fpu/e_sqrt.c
+++ b/sysdeps/powerpc/powerpc64/fpu/e_sqrt.c
@ -20,6 +20,7 @@
 #include <math.h>
 #include <math_private.h>

+#undef __ieee754_sqrt
 double
 __ieee754_sqrt (double x)
 {
--- a/sysdeps/powerpc/powerpc64/fpu/e_sqrtf.c
+++ b/sysdeps/powerpc/powerpc64/fpu/e_sqrtf.c
@ -20,6 +20,7 @@
 #include <math.h>
 #include <math_private.h>

+#undef __ieee754_sqrtf
 float
 __ieee754_sqrtf (float x)
 {