mirror of
https://sourceware.org/git/glibc.git
synced 2024-12-23 11:20:07 +00:00
i386: Replace assembly versions of e_logf with generic e_logf.c
This patch replaces i386 assembly versions of e_logf with generic e_logf.c. For workload-spec2017.wrf, on Nehalem, it improves performance by: Before After Improvement reciprocal-throughput 73.3865 40.0454 83% latency 90.0985 54.4479 65% On Skylake, it improves performance by: Before After Improvement reciprocal-throughput 75.1384 22.1452 239% latency 91.9441 50.7925 81% On IvyBridge with --disable-multi-arch, it improves performance by: Before After Improvement reciprocal-throughput 84.5575 28.7879 193% latency 103.971 57.5231 80% * sysdeps/i386/fpu/e_logf.S: Removed. * sysdeps/i386/fpu/e_logf_data.c: Likewise. * sysdeps/i386/fpu/w_logf.c: Likewise. * sysdeps/i386/i686/fpu/e_logf.S: Likewise. * sysdeps/i386/fpu/libm-test-ulps: Updated for generic e_logf.c. * sysdeps/i386/i686/fpu/multiarch/libm-test-ulps: Likewise. * sysdeps/i386/i686/fpu/multiarch/Makefile (libm-sysdep_routines): Add e_logf-sse2. (CFLAGS-e_logf-sse2.c): New. * sysdeps/i386/i686/fpu/multiarch/e_logf-sse2.c: New file. * sysdeps/i386/i686/fpu/multiarch/e_logf.c: Likewise.
This commit is contained in:
parent
7eda65f69e
commit
fe596486d6
14
ChangeLog
14
ChangeLog
@ -1,3 +1,17 @@
|
|||||||
|
2017-10-22 H.J. Lu <hongjiu.lu@intel.com>
|
||||||
|
|
||||||
|
* sysdeps/i386/fpu/e_logf.S: Removed.
|
||||||
|
* sysdeps/i386/fpu/e_logf_data.c: Likewise.
|
||||||
|
* sysdeps/i386/fpu/w_logf.c: Likewise.
|
||||||
|
* sysdeps/i386/i686/fpu/e_logf.S: Likewise.
|
||||||
|
* sysdeps/i386/fpu/libm-test-ulps: Updated for generic e_logf.c.
|
||||||
|
* sysdeps/i386/i686/fpu/multiarch/libm-test-ulps: Likewise.
|
||||||
|
* sysdeps/i386/i686/fpu/multiarch/Makefile (libm-sysdep_routines):
|
||||||
|
Add e_logf-sse2.
|
||||||
|
(CFLAGS-e_logf-sse2.c): New.
|
||||||
|
* sysdeps/i386/i686/fpu/multiarch/e_logf-sse2.c: New file.
|
||||||
|
* sysdeps/i386/i686/fpu/multiarch/e_logf.c: Likewise.
|
||||||
|
|
||||||
2017-10-22 H.J. Lu <hongjiu.lu@intel.com>
|
2017-10-22 H.J. Lu <hongjiu.lu@intel.com>
|
||||||
|
|
||||||
* sysdeps/i386/fpu/e_exp2f.S: Removed.
|
* sysdeps/i386/fpu/e_exp2f.S: Removed.
|
||||||
|
@ -1,93 +0,0 @@
|
|||||||
/*
|
|
||||||
* Written by J.T. Conklin <jtc@netbsd.org>.
|
|
||||||
* Public domain.
|
|
||||||
* Adapted for float by Ulrich Drepper <drepper@cygnus.com>.
|
|
||||||
*
|
|
||||||
* Changed to use fyl2xp1 for values near 1, <drepper@cygnus.com>.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#include <machine/asm.h>
|
|
||||||
|
|
||||||
.section .rodata.cst8,"aM",@progbits,8
|
|
||||||
|
|
||||||
.p2align 3
|
|
||||||
.type one,@object
|
|
||||||
one: .double 1.0
|
|
||||||
ASM_SIZE_DIRECTIVE(one)
|
|
||||||
/* It is not important that this constant is precise. It is only
|
|
||||||
a value which is known to be on the safe side for using the
|
|
||||||
fyl2xp1 instruction. */
|
|
||||||
.type limit,@object
|
|
||||||
limit: .double 0.29
|
|
||||||
ASM_SIZE_DIRECTIVE(limit)
|
|
||||||
|
|
||||||
|
|
||||||
#ifdef PIC
|
|
||||||
# define MO(op) op##@GOTOFF(%edx)
|
|
||||||
#else
|
|
||||||
# define MO(op) op
|
|
||||||
#endif
|
|
||||||
|
|
||||||
.text
|
|
||||||
ENTRY(__ieee754_logf)
|
|
||||||
fldln2 // log(2)
|
|
||||||
flds 4(%esp) // x : log(2)
|
|
||||||
fxam
|
|
||||||
fnstsw
|
|
||||||
#ifdef PIC
|
|
||||||
LOAD_PIC_REG (dx)
|
|
||||||
#endif
|
|
||||||
fld %st // x : x : log(2)
|
|
||||||
sahf
|
|
||||||
jc 3f // in case x is NaN or +-Inf
|
|
||||||
4: fsubl MO(one) // x-1 : x : log(2)
|
|
||||||
fld %st // x-1 : x-1 : x : log(2)
|
|
||||||
fabs // |x-1| : x-1 : x : log(2)
|
|
||||||
fcompl MO(limit) // x-1 : x : log(2)
|
|
||||||
fnstsw // x-1 : x : log(2)
|
|
||||||
andb $0x45, %ah
|
|
||||||
jz 2f
|
|
||||||
fxam
|
|
||||||
fnstsw
|
|
||||||
andb $0x45, %ah
|
|
||||||
cmpb $0x40, %ah
|
|
||||||
jne 5f
|
|
||||||
fabs // log(1) is +0 in all rounding modes.
|
|
||||||
5: fstp %st(1) // x-1 : log(2)
|
|
||||||
fyl2xp1 // log(x)
|
|
||||||
ret
|
|
||||||
|
|
||||||
2: fstp %st(0) // x : log(2)
|
|
||||||
fyl2x // log(x)
|
|
||||||
ret
|
|
||||||
|
|
||||||
3: jp 4b // in case x is +-Inf
|
|
||||||
fstp %st(1)
|
|
||||||
fstp %st(1)
|
|
||||||
ret
|
|
||||||
END (__ieee754_logf)
|
|
||||||
|
|
||||||
ENTRY(__logf_finite)
|
|
||||||
fldln2 // log(2)
|
|
||||||
flds 4(%esp) // x : log(2)
|
|
||||||
#ifdef PIC
|
|
||||||
LOAD_PIC_REG (dx)
|
|
||||||
#endif
|
|
||||||
fld %st // x : x : log(2)
|
|
||||||
fsubl MO(one) // x-1 : x : log(2)
|
|
||||||
fld %st // x-1 : x-1 : x : log(2)
|
|
||||||
fabs // |x-1| : x-1 : x : log(2)
|
|
||||||
fcompl MO(limit) // x-1 : x : log(2)
|
|
||||||
fnstsw // x-1 : x : log(2)
|
|
||||||
andb $0x45, %ah
|
|
||||||
jz 2b
|
|
||||||
fxam
|
|
||||||
fnstsw
|
|
||||||
andb $0x45, %ah
|
|
||||||
cmpb $0x40, %ah
|
|
||||||
jne 6f
|
|
||||||
fabs // log(1) is +0 in all rounding modes.
|
|
||||||
6: fstp %st(1) // x-1 : log(2)
|
|
||||||
fyl2xp1 // log(x)
|
|
||||||
ret
|
|
||||||
END(__logf_finite)
|
|
@ -1 +0,0 @@
|
|||||||
/* Not needed. */
|
|
@ -2000,17 +2000,17 @@ ldouble: 4
|
|||||||
|
|
||||||
Function: "gamma_downward":
|
Function: "gamma_downward":
|
||||||
double: 4
|
double: 4
|
||||||
float: 4
|
float: 5
|
||||||
idouble: 4
|
idouble: 4
|
||||||
ifloat: 4
|
ifloat: 5
|
||||||
ildouble: 7
|
ildouble: 7
|
||||||
ldouble: 7
|
ldouble: 7
|
||||||
|
|
||||||
Function: "gamma_towardzero":
|
Function: "gamma_towardzero":
|
||||||
double: 4
|
double: 4
|
||||||
float: 2
|
float: 3
|
||||||
idouble: 4
|
idouble: 4
|
||||||
ifloat: 2
|
ifloat: 3
|
||||||
ildouble: 7
|
ildouble: 7
|
||||||
ldouble: 7
|
ldouble: 7
|
||||||
|
|
||||||
@ -2186,20 +2186,20 @@ ldouble: 4
|
|||||||
|
|
||||||
Function: "lgamma_downward":
|
Function: "lgamma_downward":
|
||||||
double: 4
|
double: 4
|
||||||
float: 4
|
float: 5
|
||||||
float128: 8
|
float128: 8
|
||||||
idouble: 4
|
idouble: 4
|
||||||
ifloat: 4
|
ifloat: 5
|
||||||
ifloat128: 8
|
ifloat128: 8
|
||||||
ildouble: 7
|
ildouble: 7
|
||||||
ldouble: 7
|
ldouble: 7
|
||||||
|
|
||||||
Function: "lgamma_towardzero":
|
Function: "lgamma_towardzero":
|
||||||
double: 4
|
double: 4
|
||||||
float: 2
|
float: 3
|
||||||
float128: 5
|
float128: 5
|
||||||
idouble: 4
|
idouble: 4
|
||||||
ifloat: 2
|
ifloat: 3
|
||||||
ifloat128: 5
|
ifloat128: 5
|
||||||
ildouble: 7
|
ildouble: 7
|
||||||
ldouble: 7
|
ldouble: 7
|
||||||
@ -2641,10 +2641,10 @@ ldouble: 5
|
|||||||
|
|
||||||
Function: "y0_towardzero":
|
Function: "y0_towardzero":
|
||||||
double: 2
|
double: 2
|
||||||
float: 2
|
float: 3
|
||||||
float128: 3
|
float128: 3
|
||||||
idouble: 2
|
idouble: 2
|
||||||
ifloat: 2
|
ifloat: 3
|
||||||
ifloat128: 3
|
ifloat128: 3
|
||||||
ildouble: 5
|
ildouble: 5
|
||||||
ldouble: 5
|
ldouble: 5
|
||||||
|
@ -1 +0,0 @@
|
|||||||
#include <sysdeps/../math/w_logf.c>
|
|
@ -1,30 +0,0 @@
|
|||||||
/*
|
|
||||||
* Written by J.T. Conklin <jtc@netbsd.org>.
|
|
||||||
* Public domain.
|
|
||||||
* Adapted for float by Ulrich Drepper <drepper@cygnus.com>.
|
|
||||||
*
|
|
||||||
* Adapted for i686 instructions.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#include <machine/asm.h>
|
|
||||||
|
|
||||||
|
|
||||||
.text
|
|
||||||
ENTRY(__ieee754_logf)
|
|
||||||
fldln2 // log(2)
|
|
||||||
flds 4(%esp) // x : log(2)
|
|
||||||
fucomi %st
|
|
||||||
jp 3f
|
|
||||||
fyl2x // log(x)
|
|
||||||
ret
|
|
||||||
|
|
||||||
3: fstp %st(1)
|
|
||||||
ret
|
|
||||||
END (__ieee754_logf)
|
|
||||||
|
|
||||||
ENTRY(__logf_finite)
|
|
||||||
fldln2 // log(2)
|
|
||||||
flds 4(%esp) // x : log(2)
|
|
||||||
fyl2x // log(x)
|
|
||||||
ret
|
|
||||||
END(__logf_finite)
|
|
@ -1,7 +1,8 @@
|
|||||||
ifeq ($(subdir),math)
|
ifeq ($(subdir),math)
|
||||||
libm-sysdep_routines += e_exp2f-sse2 e_expf-sse2 s_sinf-sse2 s_cosf-sse2 \
|
libm-sysdep_routines += e_exp2f-sse2 e_expf-sse2 e_logf-sse2 \
|
||||||
s_sincosf-sse2
|
s_sinf-sse2 s_cosf-sse2 s_sincosf-sse2
|
||||||
|
|
||||||
CFLAGS-e_exp2f-sse2.c = -msse2 -mfpmath=sse
|
CFLAGS-e_exp2f-sse2.c = -msse2 -mfpmath=sse
|
||||||
CFLAGS-e_expf-sse2.c = -msse2 -mfpmath=sse
|
CFLAGS-e_expf-sse2.c = -msse2 -mfpmath=sse
|
||||||
|
CFLAGS-e_logf-sse2.c = -msse2 -mfpmath=sse
|
||||||
endif
|
endif
|
||||||
|
3
sysdeps/i386/i686/fpu/multiarch/e_logf-sse2.c
Normal file
3
sysdeps/i386/i686/fpu/multiarch/e_logf-sse2.c
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
#define __logf __logf_sse2
|
||||||
|
|
||||||
|
#include <sysdeps/ieee754/flt-32/e_logf.c>
|
40
sysdeps/i386/i686/fpu/multiarch/e_logf.c
Normal file
40
sysdeps/i386/i686/fpu/multiarch/e_logf.c
Normal file
@ -0,0 +1,40 @@
|
|||||||
|
/* Multiple versions of logf.
|
||||||
|
Copyright (C) 2017 Free Software Foundation, Inc.
|
||||||
|
This file is part of the GNU C Library.
|
||||||
|
|
||||||
|
The GNU C Library is free software; you can redistribute it and/or
|
||||||
|
modify it under the terms of the GNU Lesser General Public
|
||||||
|
License as published by the Free Software Foundation; either
|
||||||
|
version 2.1 of the License, or (at your option) any later version.
|
||||||
|
|
||||||
|
The GNU C Library is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
Lesser General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU Lesser General Public
|
||||||
|
License along with the GNU C Library; if not, see
|
||||||
|
<http://www.gnu.org/licenses/>. */
|
||||||
|
|
||||||
|
extern float __redirect_logf (float);
|
||||||
|
|
||||||
|
#define SYMBOL_NAME logf
|
||||||
|
#include "ifunc-sse2.h"
|
||||||
|
|
||||||
|
libc_ifunc_redirected (__redirect_logf, __logf, IFUNC_SELECTOR ());
|
||||||
|
|
||||||
|
#ifdef SHARED
|
||||||
|
__hidden_ver1 (__logf_ia32, __GI___logf, __redirect_logf)
|
||||||
|
__attribute__ ((visibility ("hidden")));
|
||||||
|
|
||||||
|
# include <shlib-compat.h>
|
||||||
|
versioned_symbol (libm, __logf, logf, GLIBC_2_27);
|
||||||
|
#else
|
||||||
|
weak_alias (__logf, logf)
|
||||||
|
#endif
|
||||||
|
|
||||||
|
strong_alias (__logf, __ieee754_logf)
|
||||||
|
strong_alias (__logf, __logf_finite)
|
||||||
|
|
||||||
|
#define __logf __logf_ia32
|
||||||
|
#include <sysdeps/ieee754/flt-32/e_logf.c>
|
@ -2000,9 +2000,9 @@ ldouble: 4
|
|||||||
|
|
||||||
Function: "gamma_downward":
|
Function: "gamma_downward":
|
||||||
double: 4
|
double: 4
|
||||||
float: 4
|
float: 5
|
||||||
idouble: 4
|
idouble: 4
|
||||||
ifloat: 4
|
ifloat: 5
|
||||||
ildouble: 7
|
ildouble: 7
|
||||||
ldouble: 7
|
ldouble: 7
|
||||||
|
|
||||||
@ -2186,10 +2186,10 @@ ldouble: 4
|
|||||||
|
|
||||||
Function: "lgamma_downward":
|
Function: "lgamma_downward":
|
||||||
double: 4
|
double: 4
|
||||||
float: 4
|
float: 5
|
||||||
float128: 8
|
float128: 8
|
||||||
idouble: 4
|
idouble: 4
|
||||||
ifloat: 4
|
ifloat: 5
|
||||||
ifloat128: 8
|
ifloat128: 8
|
||||||
ildouble: 7
|
ildouble: 7
|
||||||
ldouble: 7
|
ldouble: 7
|
||||||
@ -2625,10 +2625,10 @@ ldouble: 5
|
|||||||
|
|
||||||
Function: "y0_towardzero":
|
Function: "y0_towardzero":
|
||||||
double: 2
|
double: 2
|
||||||
float: 2
|
float: 3
|
||||||
float128: 3
|
float128: 3
|
||||||
idouble: 2
|
idouble: 2
|
||||||
ifloat: 2
|
ifloat: 3
|
||||||
ifloat128: 3
|
ifloat128: 3
|
||||||
ildouble: 5
|
ildouble: 5
|
||||||
ldouble: 5
|
ldouble: 5
|
||||||
|
Loading…
Reference in New Issue
Block a user