mirror of
https://sourceware.org/git/glibc.git
synced 2024-12-23 03:10:05 +00:00
i386: Replace assembly versions of e_logf with generic e_logf.c
This patch replaces i386 assembly versions of e_logf with generic e_logf.c. For workload-spec2017.wrf, on Nehalem, it improves performance by: Before After Improvement reciprocal-throughput 73.3865 40.0454 83% latency 90.0985 54.4479 65% On Skylake, it improves performance by: Before After Improvement reciprocal-throughput 75.1384 22.1452 239% latency 91.9441 50.7925 81% On IvyBridge with --disable-multi-arch, it improves performance by: Before After Improvement reciprocal-throughput 84.5575 28.7879 193% latency 103.971 57.5231 80% * sysdeps/i386/fpu/e_logf.S: Removed. * sysdeps/i386/fpu/e_logf_data.c: Likewise. * sysdeps/i386/fpu/w_logf.c: Likewise. * sysdeps/i386/i686/fpu/e_logf.S: Likewise. * sysdeps/i386/fpu/libm-test-ulps: Updated for generic e_logf.c. * sysdeps/i386/i686/fpu/multiarch/libm-test-ulps: Likewise. * sysdeps/i386/i686/fpu/multiarch/Makefile (libm-sysdep_routines): Add e_logf-sse2. (CFLAGS-e_logf-sse2.c): New. * sysdeps/i386/i686/fpu/multiarch/e_logf-sse2.c: New file. * sysdeps/i386/i686/fpu/multiarch/e_logf.c: Likewise.
This commit is contained in:
parent
7eda65f69e
commit
fe596486d6
14
ChangeLog
14
ChangeLog
@ -1,3 +1,17 @@
|
||||
2017-10-22 H.J. Lu <hongjiu.lu@intel.com>
|
||||
|
||||
* sysdeps/i386/fpu/e_logf.S: Removed.
|
||||
* sysdeps/i386/fpu/e_logf_data.c: Likewise.
|
||||
* sysdeps/i386/fpu/w_logf.c: Likewise.
|
||||
* sysdeps/i386/i686/fpu/e_logf.S: Likewise.
|
||||
* sysdeps/i386/fpu/libm-test-ulps: Updated for generic e_logf.c.
|
||||
* sysdeps/i386/i686/fpu/multiarch/libm-test-ulps: Likewise.
|
||||
* sysdeps/i386/i686/fpu/multiarch/Makefile (libm-sysdep_routines):
|
||||
Add e_logf-sse2.
|
||||
(CFLAGS-e_logf-sse2.c): New.
|
||||
* sysdeps/i386/i686/fpu/multiarch/e_logf-sse2.c: New file.
|
||||
* sysdeps/i386/i686/fpu/multiarch/e_logf.c: Likewise.
|
||||
|
||||
2017-10-22 H.J. Lu <hongjiu.lu@intel.com>
|
||||
|
||||
* sysdeps/i386/fpu/e_exp2f.S: Removed.
|
||||
|
@ -1,93 +0,0 @@
|
||||
/*
|
||||
* Written by J.T. Conklin <jtc@netbsd.org>.
|
||||
* Public domain.
|
||||
* Adapted for float by Ulrich Drepper <drepper@cygnus.com>.
|
||||
*
|
||||
* Changed to use fyl2xp1 for values near 1, <drepper@cygnus.com>.
|
||||
*/
|
||||
|
||||
#include <machine/asm.h>
|
||||
|
||||
.section .rodata.cst8,"aM",@progbits,8
|
||||
|
||||
.p2align 3
|
||||
.type one,@object
|
||||
one: .double 1.0
|
||||
ASM_SIZE_DIRECTIVE(one)
|
||||
/* It is not important that this constant is precise. It is only
|
||||
a value which is known to be on the safe side for using the
|
||||
fyl2xp1 instruction. */
|
||||
.type limit,@object
|
||||
limit: .double 0.29
|
||||
ASM_SIZE_DIRECTIVE(limit)
|
||||
|
||||
|
||||
#ifdef PIC
|
||||
# define MO(op) op##@GOTOFF(%edx)
|
||||
#else
|
||||
# define MO(op) op
|
||||
#endif
|
||||
|
||||
.text
|
||||
ENTRY(__ieee754_logf)
|
||||
fldln2 // log(2)
|
||||
flds 4(%esp) // x : log(2)
|
||||
fxam
|
||||
fnstsw
|
||||
#ifdef PIC
|
||||
LOAD_PIC_REG (dx)
|
||||
#endif
|
||||
fld %st // x : x : log(2)
|
||||
sahf
|
||||
jc 3f // in case x is NaN or +-Inf
|
||||
4: fsubl MO(one) // x-1 : x : log(2)
|
||||
fld %st // x-1 : x-1 : x : log(2)
|
||||
fabs // |x-1| : x-1 : x : log(2)
|
||||
fcompl MO(limit) // x-1 : x : log(2)
|
||||
fnstsw // x-1 : x : log(2)
|
||||
andb $0x45, %ah
|
||||
jz 2f
|
||||
fxam
|
||||
fnstsw
|
||||
andb $0x45, %ah
|
||||
cmpb $0x40, %ah
|
||||
jne 5f
|
||||
fabs // log(1) is +0 in all rounding modes.
|
||||
5: fstp %st(1) // x-1 : log(2)
|
||||
fyl2xp1 // log(x)
|
||||
ret
|
||||
|
||||
2: fstp %st(0) // x : log(2)
|
||||
fyl2x // log(x)
|
||||
ret
|
||||
|
||||
3: jp 4b // in case x is +-Inf
|
||||
fstp %st(1)
|
||||
fstp %st(1)
|
||||
ret
|
||||
END (__ieee754_logf)
|
||||
|
||||
ENTRY(__logf_finite)
|
||||
fldln2 // log(2)
|
||||
flds 4(%esp) // x : log(2)
|
||||
#ifdef PIC
|
||||
LOAD_PIC_REG (dx)
|
||||
#endif
|
||||
fld %st // x : x : log(2)
|
||||
fsubl MO(one) // x-1 : x : log(2)
|
||||
fld %st // x-1 : x-1 : x : log(2)
|
||||
fabs // |x-1| : x-1 : x : log(2)
|
||||
fcompl MO(limit) // x-1 : x : log(2)
|
||||
fnstsw // x-1 : x : log(2)
|
||||
andb $0x45, %ah
|
||||
jz 2b
|
||||
fxam
|
||||
fnstsw
|
||||
andb $0x45, %ah
|
||||
cmpb $0x40, %ah
|
||||
jne 6f
|
||||
fabs // log(1) is +0 in all rounding modes.
|
||||
6: fstp %st(1) // x-1 : log(2)
|
||||
fyl2xp1 // log(x)
|
||||
ret
|
||||
END(__logf_finite)
|
@ -1 +0,0 @@
|
||||
/* Not needed. */
|
@ -2000,17 +2000,17 @@ ldouble: 4
|
||||
|
||||
Function: "gamma_downward":
|
||||
double: 4
|
||||
float: 4
|
||||
float: 5
|
||||
idouble: 4
|
||||
ifloat: 4
|
||||
ifloat: 5
|
||||
ildouble: 7
|
||||
ldouble: 7
|
||||
|
||||
Function: "gamma_towardzero":
|
||||
double: 4
|
||||
float: 2
|
||||
float: 3
|
||||
idouble: 4
|
||||
ifloat: 2
|
||||
ifloat: 3
|
||||
ildouble: 7
|
||||
ldouble: 7
|
||||
|
||||
@ -2186,20 +2186,20 @@ ldouble: 4
|
||||
|
||||
Function: "lgamma_downward":
|
||||
double: 4
|
||||
float: 4
|
||||
float: 5
|
||||
float128: 8
|
||||
idouble: 4
|
||||
ifloat: 4
|
||||
ifloat: 5
|
||||
ifloat128: 8
|
||||
ildouble: 7
|
||||
ldouble: 7
|
||||
|
||||
Function: "lgamma_towardzero":
|
||||
double: 4
|
||||
float: 2
|
||||
float: 3
|
||||
float128: 5
|
||||
idouble: 4
|
||||
ifloat: 2
|
||||
ifloat: 3
|
||||
ifloat128: 5
|
||||
ildouble: 7
|
||||
ldouble: 7
|
||||
@ -2641,10 +2641,10 @@ ldouble: 5
|
||||
|
||||
Function: "y0_towardzero":
|
||||
double: 2
|
||||
float: 2
|
||||
float: 3
|
||||
float128: 3
|
||||
idouble: 2
|
||||
ifloat: 2
|
||||
ifloat: 3
|
||||
ifloat128: 3
|
||||
ildouble: 5
|
||||
ldouble: 5
|
||||
|
@ -1 +0,0 @@
|
||||
#include <sysdeps/../math/w_logf.c>
|
@ -1,30 +0,0 @@
|
||||
/*
|
||||
* Written by J.T. Conklin <jtc@netbsd.org>.
|
||||
* Public domain.
|
||||
* Adapted for float by Ulrich Drepper <drepper@cygnus.com>.
|
||||
*
|
||||
* Adapted for i686 instructions.
|
||||
*/
|
||||
|
||||
#include <machine/asm.h>
|
||||
|
||||
|
||||
.text
|
||||
ENTRY(__ieee754_logf)
|
||||
fldln2 // log(2)
|
||||
flds 4(%esp) // x : log(2)
|
||||
fucomi %st
|
||||
jp 3f
|
||||
fyl2x // log(x)
|
||||
ret
|
||||
|
||||
3: fstp %st(1)
|
||||
ret
|
||||
END (__ieee754_logf)
|
||||
|
||||
ENTRY(__logf_finite)
|
||||
fldln2 // log(2)
|
||||
flds 4(%esp) // x : log(2)
|
||||
fyl2x // log(x)
|
||||
ret
|
||||
END(__logf_finite)
|
@ -1,7 +1,8 @@
|
||||
ifeq ($(subdir),math)
|
||||
libm-sysdep_routines += e_exp2f-sse2 e_expf-sse2 s_sinf-sse2 s_cosf-sse2 \
|
||||
s_sincosf-sse2
|
||||
libm-sysdep_routines += e_exp2f-sse2 e_expf-sse2 e_logf-sse2 \
|
||||
s_sinf-sse2 s_cosf-sse2 s_sincosf-sse2
|
||||
|
||||
CFLAGS-e_exp2f-sse2.c = -msse2 -mfpmath=sse
|
||||
CFLAGS-e_expf-sse2.c = -msse2 -mfpmath=sse
|
||||
CFLAGS-e_logf-sse2.c = -msse2 -mfpmath=sse
|
||||
endif
|
||||
|
3
sysdeps/i386/i686/fpu/multiarch/e_logf-sse2.c
Normal file
3
sysdeps/i386/i686/fpu/multiarch/e_logf-sse2.c
Normal file
@ -0,0 +1,3 @@
|
||||
#define __logf __logf_sse2
|
||||
|
||||
#include <sysdeps/ieee754/flt-32/e_logf.c>
|
40
sysdeps/i386/i686/fpu/multiarch/e_logf.c
Normal file
40
sysdeps/i386/i686/fpu/multiarch/e_logf.c
Normal file
@ -0,0 +1,40 @@
|
||||
/* Multiple versions of logf.
|
||||
Copyright (C) 2017 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
extern float __redirect_logf (float);
|
||||
|
||||
#define SYMBOL_NAME logf
|
||||
#include "ifunc-sse2.h"
|
||||
|
||||
libc_ifunc_redirected (__redirect_logf, __logf, IFUNC_SELECTOR ());
|
||||
|
||||
#ifdef SHARED
|
||||
__hidden_ver1 (__logf_ia32, __GI___logf, __redirect_logf)
|
||||
__attribute__ ((visibility ("hidden")));
|
||||
|
||||
# include <shlib-compat.h>
|
||||
versioned_symbol (libm, __logf, logf, GLIBC_2_27);
|
||||
#else
|
||||
weak_alias (__logf, logf)
|
||||
#endif
|
||||
|
||||
strong_alias (__logf, __ieee754_logf)
|
||||
strong_alias (__logf, __logf_finite)
|
||||
|
||||
#define __logf __logf_ia32
|
||||
#include <sysdeps/ieee754/flt-32/e_logf.c>
|
@ -2000,9 +2000,9 @@ ldouble: 4
|
||||
|
||||
Function: "gamma_downward":
|
||||
double: 4
|
||||
float: 4
|
||||
float: 5
|
||||
idouble: 4
|
||||
ifloat: 4
|
||||
ifloat: 5
|
||||
ildouble: 7
|
||||
ldouble: 7
|
||||
|
||||
@ -2186,10 +2186,10 @@ ldouble: 4
|
||||
|
||||
Function: "lgamma_downward":
|
||||
double: 4
|
||||
float: 4
|
||||
float: 5
|
||||
float128: 8
|
||||
idouble: 4
|
||||
ifloat: 4
|
||||
ifloat: 5
|
||||
ifloat128: 8
|
||||
ildouble: 7
|
||||
ldouble: 7
|
||||
@ -2625,10 +2625,10 @@ ldouble: 5
|
||||
|
||||
Function: "y0_towardzero":
|
||||
double: 2
|
||||
float: 2
|
||||
float: 3
|
||||
float128: 3
|
||||
idouble: 2
|
||||
ifloat: 2
|
||||
ifloat: 3
|
||||
ifloat128: 3
|
||||
ildouble: 5
|
||||
ldouble: 5
|
||||
|
Loading…
Reference in New Issue
Block a user