mirror of
https://sourceware.org/git/glibc.git
synced 2024-11-08 22:30:07 +00:00
i386: Replace assembly versions of e_log2f with generic e_log2f.c
This patch replaces i386 assembly versions of e_log2f with generic e_log2f.c. For workload-spec2017.wrf, on Nehalem, it improves performance by: Before After Improvement reciprocal-throughput 92.3845 30.8752 199% latency 112.855 54.8645 105% On Skylake, it improves performance by: Before After Improvement reciprocal-throughput 98.7488 22.7507 334% latency 118.01 51.6083 128% On IvyBridge with --disable-multi-arch, it improves performance by: Before After Improvement reciprocal-throughput 106.635 28.8596 269% latency 129.888 56.9187 128% * sysdeps/i386/fpu/e_log2f.S: Removed. * sysdeps/i386/fpu/e_log2f_data.c: Likewise. * sysdeps/i386/fpu/w_log2f.c: Likewise. * sysdeps/i386/fpu/libm-test-ulps: Updated for generic e_log2f.c. * sysdeps/i386/i686/fpu/multiarch/libm-test-ulps: Likewise. * sysdeps/i386/i686/fpu/multiarch/Makefile (libm-sysdep_routines): Add e_log2f-sse2. (CFLAGS-e_log2f-sse2.c): New. * sysdeps/i386/i686/fpu/multiarch/e_log2f-sse2.c: New file. * sysdeps/i386/i686/fpu/multiarch/e_log2f.c: Likewise.
This commit is contained in:
parent
80bb593563
commit
6089a3ee24
13
ChangeLog
13
ChangeLog
@ -1,3 +1,16 @@
|
||||
2017-10-22 H.J. Lu <hongjiu.lu@intel.com>
|
||||
|
||||
* sysdeps/i386/fpu/e_log2f.S: Removed.
|
||||
* sysdeps/i386/fpu/e_log2f_data.c: Likewise.
|
||||
* sysdeps/i386/fpu/w_log2f.c: Likewise.
|
||||
* sysdeps/i386/fpu/libm-test-ulps: Updated for generic e_log2f.c.
|
||||
* sysdeps/i386/i686/fpu/multiarch/libm-test-ulps: Likewise.
|
||||
* sysdeps/i386/i686/fpu/multiarch/Makefile (libm-sysdep_routines):
|
||||
Add e_log2f-sse2.
|
||||
(CFLAGS-e_log2f-sse2.c): New.
|
||||
* sysdeps/i386/i686/fpu/multiarch/e_log2f-sse2.c: New file.
|
||||
* sysdeps/i386/i686/fpu/multiarch/e_log2f.c: Likewise.
|
||||
|
||||
2017-10-22 H.J. Lu <hongjiu.lu@intel.com>
|
||||
|
||||
* sysdeps/x86_64/fpu/multiarch/Makefile (libm-sysdep_routines):
|
||||
|
@ -1,69 +0,0 @@
|
||||
/*
|
||||
* Written by J.T. Conklin <jtc@netbsd.org>.
|
||||
* Adapted for use as log2 by Ulrich Drepper <drepper@cygnus.com>.
|
||||
* Public domain.
|
||||
*
|
||||
* Changed to use fyl2xp1 for values near 1, <drepper@cygnus.com>.
|
||||
*/
|
||||
|
||||
#include <machine/asm.h>
|
||||
|
||||
.section .rodata.cst8,"aM",@progbits,8
|
||||
|
||||
.p2align 3
|
||||
.type one,@object
|
||||
one: .double 1.0
|
||||
ASM_SIZE_DIRECTIVE(one)
|
||||
/* It is not important that this constant is precise. It is only
|
||||
a value which is known to be on the safe side for using the
|
||||
fyl2xp1 instruction. */
|
||||
.type limit,@object
|
||||
limit: .double 0.29
|
||||
ASM_SIZE_DIRECTIVE(limit)
|
||||
|
||||
|
||||
#ifdef PIC
|
||||
# define MO(op) op##@GOTOFF(%edx)
|
||||
#else
|
||||
# define MO(op) op
|
||||
#endif
|
||||
|
||||
.text
|
||||
ENTRY(__ieee754_log2f)
|
||||
#ifdef PIC
|
||||
LOAD_PIC_REG (dx)
|
||||
#endif
|
||||
fldl MO(one)
|
||||
flds 4(%esp) // x : 1
|
||||
fxam
|
||||
fnstsw
|
||||
fld %st // x : x : 1
|
||||
sahf
|
||||
jc 3f // in case x is NaN or ±Inf
|
||||
4: fsub %st(2), %st // x-1 : x : 1
|
||||
fld %st // x-1 : x-1 : x : 1
|
||||
fabs // |x-1| : x-1 : x : 1
|
||||
fcompl MO(limit) // x-1 : x : 1
|
||||
fnstsw // x-1 : x : 1
|
||||
andb $0x45, %ah
|
||||
jz 2f
|
||||
fxam
|
||||
fnstsw
|
||||
andb $0x45, %ah
|
||||
cmpb $0x40, %ah
|
||||
jne 5f
|
||||
fabs // log2(1) is +0 in all rounding modes.
|
||||
5: fstp %st(1) // x-1 : 1
|
||||
fyl2xp1 // log(x)
|
||||
ret
|
||||
|
||||
2: fstp %st(0) // x : 1
|
||||
fyl2x // log(x)
|
||||
ret
|
||||
|
||||
3: jp 4b // in case x is ±Inf
|
||||
fstp %st(1)
|
||||
fstp %st(1)
|
||||
ret
|
||||
END (__ieee754_log2f)
|
||||
strong_alias (__ieee754_log2f, __log2f_finite)
|
@ -1 +0,0 @@
|
||||
/* Not needed. */
|
@ -2300,8 +2300,10 @@ ldouble: 3
|
||||
|
||||
Function: "log2":
|
||||
double: 1
|
||||
float: 1
|
||||
float128: 2
|
||||
idouble: 1
|
||||
ifloat: 1
|
||||
ifloat128: 2
|
||||
ildouble: 1
|
||||
ldouble: 1
|
||||
|
@ -1 +0,0 @@
|
||||
#include <sysdeps/../math/w_log2f.c>
|
@ -1,8 +1,9 @@
|
||||
ifeq ($(subdir),math)
|
||||
libm-sysdep_routines += e_exp2f-sse2 e_expf-sse2 e_logf-sse2 \
|
||||
libm-sysdep_routines += e_exp2f-sse2 e_expf-sse2 e_logf-sse2 e_log2f-sse2 \
|
||||
s_sinf-sse2 s_cosf-sse2 s_sincosf-sse2
|
||||
|
||||
CFLAGS-e_exp2f-sse2.c = -msse2 -mfpmath=sse
|
||||
CFLAGS-e_expf-sse2.c = -msse2 -mfpmath=sse
|
||||
CFLAGS-e_log2f-sse2.c = -msse2 -mfpmath=sse
|
||||
CFLAGS-e_logf-sse2.c = -msse2 -mfpmath=sse
|
||||
endif
|
||||
|
3
sysdeps/i386/i686/fpu/multiarch/e_log2f-sse2.c
Normal file
3
sysdeps/i386/i686/fpu/multiarch/e_log2f-sse2.c
Normal file
@ -0,0 +1,3 @@
|
||||
#define __log2f __log2f_sse2
|
||||
|
||||
#include <sysdeps/ieee754/flt-32/e_log2f.c>
|
40
sysdeps/i386/i686/fpu/multiarch/e_log2f.c
Normal file
40
sysdeps/i386/i686/fpu/multiarch/e_log2f.c
Normal file
@ -0,0 +1,40 @@
|
||||
/* Multiple versions of log2f.
|
||||
Copyright (C) 2017 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
extern float __redirect_log2f (float);
|
||||
|
||||
#define SYMBOL_NAME log2f
|
||||
#include "ifunc-sse2.h"
|
||||
|
||||
libc_ifunc_redirected (__redirect_log2f, __log2f, IFUNC_SELECTOR ());
|
||||
|
||||
#ifdef SHARED
|
||||
__hidden_ver1 (__log2f_ia32, __GI___log2f, __redirect_log2f)
|
||||
__attribute__ ((visibility ("hidden")));
|
||||
|
||||
# include <shlib-compat.h>
|
||||
versioned_symbol (libm, __log2f, log2f, GLIBC_2_27);
|
||||
#else
|
||||
weak_alias (__log2f, log2f)
|
||||
#endif
|
||||
|
||||
strong_alias (__log2f, __ieee754_log2f)
|
||||
strong_alias (__log2f, __log2f_finite)
|
||||
|
||||
#define __log2f __log2f_ia32
|
||||
#include <sysdeps/ieee754/flt-32/e_log2f.c>
|
@ -2300,8 +2300,10 @@ ldouble: 3
|
||||
|
||||
Function: "log2":
|
||||
double: 1
|
||||
float: 1
|
||||
float128: 2
|
||||
idouble: 1
|
||||
ifloat: 1
|
||||
ifloat128: 2
|
||||
ildouble: 1
|
||||
ldouble: 1
|
||||
|
Loading…
Reference in New Issue
Block a user