i386: Replace assembly versions of e_logf with generic e_logf.c

This patch replaces i386 assembly versions of e_logf with generic
e_logf.c.  For workload-spec2017.wrf, on Nehalem, it improves
performance by:

                           Before            After     Improvement
reciprocal-throughput      73.3865          40.0454       83%
latency                    90.0985          54.4479       65%

On Skylake, it improves performance by:

                           Before            After     Improvement
reciprocal-throughput      75.1384          22.1452       239%
latency                    91.9441          50.7925       81%

On IvyBridge with --disable-multi-arch, it improves performance by:

                           Before            After     Improvement
reciprocal-throughput      84.5575          28.7879       193%
latency                    103.971          57.5231       80%

	* sysdeps/i386/fpu/e_logf.S: Removed.
	* sysdeps/i386/fpu/e_logf_data.c: Likewise.
	* sysdeps/i386/fpu/w_logf.c: Likewise.
	* sysdeps/i386/i686/fpu/e_logf.S: Likewise.
	* sysdeps/i386/fpu/libm-test-ulps: Updated for generic e_logf.c.
	* sysdeps/i386/i686/fpu/multiarch/libm-test-ulps: Likewise.
	* sysdeps/i386/i686/fpu/multiarch/Makefile (libm-sysdep_routines):
	Add e_logf-sse2.
	(CFLAGS-e_logf-sse2.c): New.
	* sysdeps/i386/i686/fpu/multiarch/e_logf-sse2.c: New file.
	* sysdeps/i386/i686/fpu/multiarch/e_logf.c: Likewise.
This commit is contained in:
H.J. Lu 2017-10-22 08:01:38 -07:00
parent 7eda65f69e
commit fe596486d6
10 changed files with 76 additions and 143 deletions

View File

@ -1,3 +1,17 @@
2017-10-22 H.J. Lu <hongjiu.lu@intel.com>
* sysdeps/i386/fpu/e_logf.S: Removed.
* sysdeps/i386/fpu/e_logf_data.c: Likewise.
* sysdeps/i386/fpu/w_logf.c: Likewise.
* sysdeps/i386/i686/fpu/e_logf.S: Likewise.
* sysdeps/i386/fpu/libm-test-ulps: Updated for generic e_logf.c.
* sysdeps/i386/i686/fpu/multiarch/libm-test-ulps: Likewise.
* sysdeps/i386/i686/fpu/multiarch/Makefile (libm-sysdep_routines):
Add e_logf-sse2.
(CFLAGS-e_logf-sse2.c): New.
* sysdeps/i386/i686/fpu/multiarch/e_logf-sse2.c: New file.
* sysdeps/i386/i686/fpu/multiarch/e_logf.c: Likewise.
2017-10-22 H.J. Lu <hongjiu.lu@intel.com> 2017-10-22 H.J. Lu <hongjiu.lu@intel.com>
* sysdeps/i386/fpu/e_exp2f.S: Removed. * sysdeps/i386/fpu/e_exp2f.S: Removed.

View File

@ -1,93 +0,0 @@
/*
* Written by J.T. Conklin <jtc@netbsd.org>.
* Public domain.
* Adapted for float by Ulrich Drepper <drepper@cygnus.com>.
*
* Changed to use fyl2xp1 for values near 1, <drepper@cygnus.com>.
*/
#include <machine/asm.h>
.section .rodata.cst8,"aM",@progbits,8
.p2align 3
.type one,@object
one: .double 1.0
ASM_SIZE_DIRECTIVE(one)
/* It is not important that this constant is precise. It is only
a value which is known to be on the safe side for using the
fyl2xp1 instruction. */
.type limit,@object
limit: .double 0.29
ASM_SIZE_DIRECTIVE(limit)
#ifdef PIC
# define MO(op) op##@GOTOFF(%edx)
#else
# define MO(op) op
#endif
.text
ENTRY(__ieee754_logf)
fldln2 // log(2)
flds 4(%esp) // x : log(2)
fxam
fnstsw
#ifdef PIC
LOAD_PIC_REG (dx)
#endif
fld %st // x : x : log(2)
sahf
jc 3f // in case x is NaN or +-Inf
4: fsubl MO(one) // x-1 : x : log(2)
fld %st // x-1 : x-1 : x : log(2)
fabs // |x-1| : x-1 : x : log(2)
fcompl MO(limit) // x-1 : x : log(2)
fnstsw // x-1 : x : log(2)
andb $0x45, %ah
jz 2f
fxam
fnstsw
andb $0x45, %ah
cmpb $0x40, %ah
jne 5f
fabs // log(1) is +0 in all rounding modes.
5: fstp %st(1) // x-1 : log(2)
fyl2xp1 // log(x)
ret
2: fstp %st(0) // x : log(2)
fyl2x // log(x)
ret
3: jp 4b // in case x is +-Inf
fstp %st(1)
fstp %st(1)
ret
END (__ieee754_logf)
ENTRY(__logf_finite)
fldln2 // log(2)
flds 4(%esp) // x : log(2)
#ifdef PIC
LOAD_PIC_REG (dx)
#endif
fld %st // x : x : log(2)
fsubl MO(one) // x-1 : x : log(2)
fld %st // x-1 : x-1 : x : log(2)
fabs // |x-1| : x-1 : x : log(2)
fcompl MO(limit) // x-1 : x : log(2)
fnstsw // x-1 : x : log(2)
andb $0x45, %ah
jz 2b
fxam
fnstsw
andb $0x45, %ah
cmpb $0x40, %ah
jne 6f
fabs // log(1) is +0 in all rounding modes.
6: fstp %st(1) // x-1 : log(2)
fyl2xp1 // log(x)
ret
END(__logf_finite)

View File

@ -1 +0,0 @@
/* Not needed. */

View File

@ -2000,17 +2000,17 @@ ldouble: 4
Function: "gamma_downward": Function: "gamma_downward":
double: 4 double: 4
float: 4 float: 5
idouble: 4 idouble: 4
ifloat: 4 ifloat: 5
ildouble: 7 ildouble: 7
ldouble: 7 ldouble: 7
Function: "gamma_towardzero": Function: "gamma_towardzero":
double: 4 double: 4
float: 2 float: 3
idouble: 4 idouble: 4
ifloat: 2 ifloat: 3
ildouble: 7 ildouble: 7
ldouble: 7 ldouble: 7
@ -2186,20 +2186,20 @@ ldouble: 4
Function: "lgamma_downward": Function: "lgamma_downward":
double: 4 double: 4
float: 4 float: 5
float128: 8 float128: 8
idouble: 4 idouble: 4
ifloat: 4 ifloat: 5
ifloat128: 8 ifloat128: 8
ildouble: 7 ildouble: 7
ldouble: 7 ldouble: 7
Function: "lgamma_towardzero": Function: "lgamma_towardzero":
double: 4 double: 4
float: 2 float: 3
float128: 5 float128: 5
idouble: 4 idouble: 4
ifloat: 2 ifloat: 3
ifloat128: 5 ifloat128: 5
ildouble: 7 ildouble: 7
ldouble: 7 ldouble: 7
@ -2641,10 +2641,10 @@ ldouble: 5
Function: "y0_towardzero": Function: "y0_towardzero":
double: 2 double: 2
float: 2 float: 3
float128: 3 float128: 3
idouble: 2 idouble: 2
ifloat: 2 ifloat: 3
ifloat128: 3 ifloat128: 3
ildouble: 5 ildouble: 5
ldouble: 5 ldouble: 5

View File

@ -1 +0,0 @@
#include <sysdeps/../math/w_logf.c>

View File

@ -1,30 +0,0 @@
/*
* Written by J.T. Conklin <jtc@netbsd.org>.
* Public domain.
* Adapted for float by Ulrich Drepper <drepper@cygnus.com>.
*
* Adapted for i686 instructions.
*/
#include <machine/asm.h>
.text
ENTRY(__ieee754_logf)
fldln2 // log(2)
flds 4(%esp) // x : log(2)
fucomi %st
jp 3f
fyl2x // log(x)
ret
3: fstp %st(1)
ret
END (__ieee754_logf)
ENTRY(__logf_finite)
fldln2 // log(2)
flds 4(%esp) // x : log(2)
fyl2x // log(x)
ret
END(__logf_finite)

View File

@ -1,7 +1,8 @@
ifeq ($(subdir),math) ifeq ($(subdir),math)
libm-sysdep_routines += e_exp2f-sse2 e_expf-sse2 s_sinf-sse2 s_cosf-sse2 \ libm-sysdep_routines += e_exp2f-sse2 e_expf-sse2 e_logf-sse2 \
s_sincosf-sse2 s_sinf-sse2 s_cosf-sse2 s_sincosf-sse2
CFLAGS-e_exp2f-sse2.c = -msse2 -mfpmath=sse CFLAGS-e_exp2f-sse2.c = -msse2 -mfpmath=sse
CFLAGS-e_expf-sse2.c = -msse2 -mfpmath=sse CFLAGS-e_expf-sse2.c = -msse2 -mfpmath=sse
CFLAGS-e_logf-sse2.c = -msse2 -mfpmath=sse
endif endif

View File

@ -0,0 +1,3 @@
#define __logf __logf_sse2
#include <sysdeps/ieee754/flt-32/e_logf.c>

View File

@ -0,0 +1,40 @@
/* Multiple versions of logf.
Copyright (C) 2017 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */
extern float __redirect_logf (float);
#define SYMBOL_NAME logf
#include "ifunc-sse2.h"
libc_ifunc_redirected (__redirect_logf, __logf, IFUNC_SELECTOR ());
#ifdef SHARED
__hidden_ver1 (__logf_ia32, __GI___logf, __redirect_logf)
__attribute__ ((visibility ("hidden")));
# include <shlib-compat.h>
versioned_symbol (libm, __logf, logf, GLIBC_2_27);
#else
weak_alias (__logf, logf)
#endif
strong_alias (__logf, __ieee754_logf)
strong_alias (__logf, __logf_finite)
#define __logf __logf_ia32
#include <sysdeps/ieee754/flt-32/e_logf.c>

View File

@ -2000,9 +2000,9 @@ ldouble: 4
Function: "gamma_downward": Function: "gamma_downward":
double: 4 double: 4
float: 4 float: 5
idouble: 4 idouble: 4
ifloat: 4 ifloat: 5
ildouble: 7 ildouble: 7
ldouble: 7 ldouble: 7
@ -2186,10 +2186,10 @@ ldouble: 4
Function: "lgamma_downward": Function: "lgamma_downward":
double: 4 double: 4
float: 4 float: 5
float128: 8 float128: 8
idouble: 4 idouble: 4
ifloat: 4 ifloat: 5
ifloat128: 8 ifloat128: 8
ildouble: 7 ildouble: 7
ldouble: 7 ldouble: 7
@ -2625,10 +2625,10 @@ ldouble: 5
Function: "y0_towardzero": Function: "y0_towardzero":
double: 2 double: 2
float: 2 float: 3
float128: 3 float128: 3
idouble: 2 idouble: 2
ifloat: 2 ifloat: 3
ifloat128: 3 ifloat128: 3
ildouble: 5 ildouble: 5
ldouble: 5 ldouble: 5