i386: Replace assembly versions of e_logf with generic e_logf.c

This patch replaces i386 assembly versions of e_logf with generic e_logf.c. For workload-spec2017.wrf, on Nehalem, it improves performance by: Before After Improvement reciprocal-throughput 73.3865 40.0454 83% latency 90.0985 54.4479 65% On Skylake, it improves performance by: Before After Improvement reciprocal-throughput 75.1384 22.1452 239% latency 91.9441 50.7925 81% On IvyBridge with --disable-multi-arch, it improves performance by: Before After Improvement reciprocal-throughput 84.5575 28.7879 193% latency 103.971 57.5231 80% * sysdeps/i386/fpu/e_logf.S: Removed. * sysdeps/i386/fpu/e_logf_data.c: Likewise. * sysdeps/i386/fpu/w_logf.c: Likewise. * sysdeps/i386/i686/fpu/e_logf.S: Likewise. * sysdeps/i386/fpu/libm-test-ulps: Updated for generic e_logf.c. * sysdeps/i386/i686/fpu/multiarch/libm-test-ulps: Likewise. * sysdeps/i386/i686/fpu/multiarch/Makefile (libm-sysdep_routines): Add e_logf-sse2. (CFLAGS-e_logf-sse2.c): New. * sysdeps/i386/i686/fpu/multiarch/e_logf-sse2.c: New file. * sysdeps/i386/i686/fpu/multiarch/e_logf.c: Likewise.
2024-12-23 11:20:07 +00:00 · 2017-10-22 08:01:38 -07:00 · 2017-10-22 08:01:38 -07:00 · fe596486d6
commit fe596486d6
parent 7eda65f69e
10 changed files with 76 additions and 143 deletions
--- a/14
+++ b/14
@ -1,3 +1,17 @@
 2017-10-22  H.J. Lu  <hongjiu.lu@intel.com>
 	* sysdeps/i386/fpu/e_logf.S: Removed.
 	* sysdeps/i386/fpu/e_logf_data.c: Likewise.
 	* sysdeps/i386/fpu/w_logf.c: Likewise.
 	* sysdeps/i386/i686/fpu/e_logf.S: Likewise.
 	* sysdeps/i386/fpu/libm-test-ulps: Updated for generic e_logf.c.
 	* sysdeps/i386/i686/fpu/multiarch/libm-test-ulps: Likewise.
 	* sysdeps/i386/i686/fpu/multiarch/Makefile (libm-sysdep_routines):
 	Add e_logf-sse2.
 	(CFLAGS-e_logf-sse2.c): New.
 	* sysdeps/i386/i686/fpu/multiarch/e_logf-sse2.c: New file.
 	* sysdeps/i386/i686/fpu/multiarch/e_logf.c: Likewise.
 2017-10-22  H.J. Lu  <hongjiu.lu@intel.com>
 	* sysdeps/i386/fpu/e_exp2f.S: Removed.
--- a/sysdeps/i386/fpu/e_logf.S
+++ b/sysdeps/i386/fpu/e_logf.S
@ -1,93 +0,0 @@
 /*
 * Written by J.T. Conklin <jtc@netbsd.org>.
 * Public domain.
 * Adapted for float by Ulrich Drepper <drepper@cygnus.com>.
 *
 * Changed to use fyl2xp1 for values near 1, <drepper@cygnus.com>.
 */
 #include <machine/asm.h>
 	.section .rodata.cst8,"aM",@progbits,8
 	.p2align 3
 	.type one,@object
 one:	.double 1.0
 	ASM_SIZE_DIRECTIVE(one)
 	/* It is not important that this constant is precise.  It is only
 	   a value which is known to be on the safe side for using the
 	   fyl2xp1 instruction.  */
 	.type limit,@object
 limit:	.double 0.29
 	ASM_SIZE_DIRECTIVE(limit)
 #ifdef PIC
 # define MO(op) op##@GOTOFF(%edx)
 #else
 # define MO(op) op
 #endif
 	.text
 ENTRY(__ieee754_logf)
 	fldln2			// log(2)
 	flds	4(%esp)		// x : log(2)
 	fxam
 	fnstsw
 #ifdef PIC
 	LOAD_PIC_REG (dx)
 #endif
 	fld	%st		// x : x : log(2)
 	sahf
 	jc	3f		// in case x is NaN or +-Inf
 4:	fsubl	MO(one)		// x-1 : x : log(2)
 	fld	%st		// x-1 : x-1 : x : log(2)
 	fabs			// |x-1| : x-1 : x : log(2)
 	fcompl	MO(limit)	// x-1 : x : log(2)
 	fnstsw			// x-1 : x : log(2)
 	andb	$0x45, %ah
 	jz	2f
 	fxam
 	fnstsw
 	andb	$0x45, %ah
 	cmpb	$0x40, %ah
 	jne	5f
 	fabs			// log(1) is +0 in all rounding modes.
 5:	fstp	%st(1)		// x-1 : log(2)
 	fyl2xp1			// log(x)
 	ret
 2:	fstp	%st(0)		// x : log(2)
 	fyl2x			// log(x)
 	ret
 3:	jp	4b		// in case x is +-Inf
 	fstp	%st(1)
 	fstp	%st(1)
 	ret
 END (__ieee754_logf)
 ENTRY(__logf_finite)
 	fldln2			// log(2)
 	flds	4(%esp)		// x : log(2)
 #ifdef PIC
 	LOAD_PIC_REG (dx)
 #endif
 	fld	%st		// x : x : log(2)
 	fsubl	MO(one)		// x-1 : x : log(2)
 	fld	%st		// x-1 : x-1 : x : log(2)
 	fabs			// |x-1| : x-1 : x : log(2)
 	fcompl	MO(limit)	// x-1 : x : log(2)
 	fnstsw			// x-1 : x : log(2)
 	andb	$0x45, %ah
 	jz	2b
 	fxam
 	fnstsw
 	andb	$0x45, %ah
 	cmpb	$0x40, %ah
 	jne	6f
 	fabs			// log(1) is +0 in all rounding modes.
 6:	fstp	%st(1)		// x-1 : log(2)
 	fyl2xp1			// log(x)
 	ret
 END(__logf_finite)
--- a/sysdeps/i386/fpu/e_logf_data.c
+++ b/sysdeps/i386/fpu/e_logf_data.c
@ -1 +0,0 @@
 /* Not needed.  */
--- a/sysdeps/i386/fpu/libm-test-ulps
+++ b/sysdeps/i386/fpu/libm-test-ulps
@ -2000,17 +2000,17 @@ ldouble: 4
 Function: "gamma_downward":
 double: 4
-float: 4
+float: 5
 idouble: 4
-ifloat: 4
+ifloat: 5
 ildouble: 7
 ldouble: 7
 Function: "gamma_towardzero":
 double: 4
-float: 2
+float: 3
 idouble: 4
-ifloat: 2
+ifloat: 3
 ildouble: 7
 ldouble: 7
@ -2186,20 +2186,20 @@ ldouble: 4
 Function: "lgamma_downward":
 double: 4
-float: 4
+float: 5
 float128: 8
 idouble: 4
-ifloat: 4
+ifloat: 5
 ifloat128: 8
 ildouble: 7
 ldouble: 7
 Function: "lgamma_towardzero":
 double: 4
-float: 2
+float: 3
 float128: 5
 idouble: 4
-ifloat: 2
+ifloat: 3
 ifloat128: 5
 ildouble: 7
 ldouble: 7
@ -2641,10 +2641,10 @@ ldouble: 5
 Function: "y0_towardzero":
 double: 2
-float: 2
+float: 3
 float128: 3
 idouble: 2
-ifloat: 2
+ifloat: 3
 ifloat128: 3
 ildouble: 5
 ldouble: 5
--- a/sysdeps/i386/fpu/w_logf.c
+++ b/sysdeps/i386/fpu/w_logf.c
@ -1 +0,0 @@
 #include <sysdeps/../math/w_logf.c>
--- a/sysdeps/i386/i686/fpu/e_logf.S
+++ b/sysdeps/i386/i686/fpu/e_logf.S
@ -1,30 +0,0 @@
 /*
 * Written by J.T. Conklin <jtc@netbsd.org>.
 * Public domain.
 * Adapted for float by Ulrich Drepper <drepper@cygnus.com>.
 *
 * Adapted for i686 instructions.
 */
 #include <machine/asm.h>
 	.text
 ENTRY(__ieee754_logf)
 	fldln2			// log(2)
 	flds	4(%esp)		// x : log(2)
 	fucomi	%st
 	jp	3f
 	fyl2x			// log(x)
 	ret
 3:	fstp	%st(1)
 	ret
 END (__ieee754_logf)
 ENTRY(__logf_finite)
 	fldln2			// log(2)
 	flds	4(%esp)		// x : log(2)
 	fyl2x			// log(x)
 	ret
 END(__logf_finite)
--- a/sysdeps/i386/i686/fpu/multiarch/Makefile
+++ b/sysdeps/i386/i686/fpu/multiarch/Makefile
@ -1,7 +1,8 @@
 ifeq ($(subdir),math)
-libm-sysdep_routines += e_exp2f-sse2 e_expf-sse2 s_sinf-sse2 s_cosf-sse2 \
+libm-sysdep_routines += e_exp2f-sse2 e_expf-sse2 e_logf-sse2 \
-                        s_sincosf-sse2
+			s_sinf-sse2 s_cosf-sse2 s_sincosf-sse2
 CFLAGS-e_exp2f-sse2.c = -msse2 -mfpmath=sse
 CFLAGS-e_expf-sse2.c = -msse2 -mfpmath=sse
 CFLAGS-e_logf-sse2.c = -msse2 -mfpmath=sse
 endif
--- a/sysdeps/i386/i686/fpu/multiarch/e_logf-sse2.c
+++ b/sysdeps/i386/i686/fpu/multiarch/e_logf-sse2.c
@ -0,0 +1,3 @@
 #define __logf __logf_sse2
 #include <sysdeps/ieee754/flt-32/e_logf.c>
--- a/sysdeps/i386/i686/fpu/multiarch/e_logf.c
+++ b/sysdeps/i386/i686/fpu/multiarch/e_logf.c
@ -0,0 +1,40 @@
 /* Multiple versions of logf.
   Copyright (C) 2017 Free Software Foundation, Inc.
   This file is part of the GNU C Library.
   The GNU C Library is free software; you can redistribute it and/or
   modify it under the terms of the GNU Lesser General Public
   License as published by the Free Software Foundation; either
   version 2.1 of the License, or (at your option) any later version.
   The GNU C Library is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   Lesser General Public License for more details.
   You should have received a copy of the GNU Lesser General Public
   License along with the GNU C Library; if not, see
   <http://www.gnu.org/licenses/>.  */
 extern float __redirect_logf (float);
 #define SYMBOL_NAME logf
 #include "ifunc-sse2.h"
 libc_ifunc_redirected (__redirect_logf, __logf, IFUNC_SELECTOR ());
 #ifdef SHARED
 __hidden_ver1 (__logf_ia32, __GI___logf, __redirect_logf)
  __attribute__ ((visibility ("hidden")));
 # include <shlib-compat.h>
 versioned_symbol (libm, __logf, logf, GLIBC_2_27);
 #else
 weak_alias (__logf, logf)
 #endif
 strong_alias (__logf, __ieee754_logf)
 strong_alias (__logf, __logf_finite)
 #define __logf __logf_ia32
 #include <sysdeps/ieee754/flt-32/e_logf.c>
--- a/sysdeps/i386/i686/fpu/multiarch/libm-test-ulps
+++ b/sysdeps/i386/i686/fpu/multiarch/libm-test-ulps
@ -2000,9 +2000,9 @@ ldouble: 4
 Function: "gamma_downward":
 double: 4
-float: 4
+float: 5
 idouble: 4
-ifloat: 4
+ifloat: 5
 ildouble: 7
 ldouble: 7
@ -2186,10 +2186,10 @@ ldouble: 4
 Function: "lgamma_downward":
 double: 4
-float: 4
+float: 5
 float128: 8
 idouble: 4
-ifloat: 4
+ifloat: 5
 ifloat128: 8
 ildouble: 7
 ldouble: 7
@ -2625,10 +2625,10 @@ ldouble: 5
 Function: "y0_towardzero":
 double: 2
-float: 2
+float: 3
 float128: 3
 idouble: 2
-ifloat: 2
+ifloat: 3
 ifloat128: 3
 ildouble: 5
 ldouble: 5
		`@ -0,0 +1,3 @@`
							`#define __logf __logf_sse2`

							`#include <sysdeps/ieee754/flt-32/e_logf.c>`