i386: Replace assembly versions of e_log2f with generic e_log2f.c

This patch replaces i386 assembly versions of e_log2f with generic
e_log2f.c.  For workload-spec2017.wrf, on Nehalem, it improves
performance by:

                           Before            After     Improvement
reciprocal-throughput      92.3845          30.8752       199%
latency                    112.855          54.8645       105%

On Skylake, it improves performance by:

                           Before            After     Improvement
reciprocal-throughput      98.7488          22.7507       334%
latency                    118.01           51.6083       128%

On IvyBridge with --disable-multi-arch, it improves performance by:

                           Before            After     Improvement
reciprocal-throughput      106.635          28.8596       269%
latency                    129.888          56.9187       128%

	* sysdeps/i386/fpu/e_log2f.S: Removed.
	* sysdeps/i386/fpu/e_log2f_data.c: Likewise.
	* sysdeps/i386/fpu/w_log2f.c: Likewise.
	* sysdeps/i386/fpu/libm-test-ulps: Updated for generic e_log2f.c.
	* sysdeps/i386/i686/fpu/multiarch/libm-test-ulps: Likewise.
	* sysdeps/i386/i686/fpu/multiarch/Makefile (libm-sysdep_routines):
	Add e_log2f-sse2.
	(CFLAGS-e_log2f-sse2.c): New.
	* sysdeps/i386/i686/fpu/multiarch/e_log2f-sse2.c: New file.
	* sysdeps/i386/i686/fpu/multiarch/e_log2f.c: Likewise.
This commit is contained in:
H.J. Lu 2017-10-22 08:09:08 -07:00
parent 80bb593563
commit 6089a3ee24
9 changed files with 62 additions and 72 deletions

View File

@ -1,3 +1,16 @@
2017-10-22 H.J. Lu <hongjiu.lu@intel.com>
* sysdeps/i386/fpu/e_log2f.S: Removed.
* sysdeps/i386/fpu/e_log2f_data.c: Likewise.
* sysdeps/i386/fpu/w_log2f.c: Likewise.
* sysdeps/i386/fpu/libm-test-ulps: Updated for generic e_log2f.c.
* sysdeps/i386/i686/fpu/multiarch/libm-test-ulps: Likewise.
* sysdeps/i386/i686/fpu/multiarch/Makefile (libm-sysdep_routines):
Add e_log2f-sse2.
(CFLAGS-e_log2f-sse2.c): New.
* sysdeps/i386/i686/fpu/multiarch/e_log2f-sse2.c: New file.
* sysdeps/i386/i686/fpu/multiarch/e_log2f.c: Likewise.
2017-10-22 H.J. Lu <hongjiu.lu@intel.com>
* sysdeps/x86_64/fpu/multiarch/Makefile (libm-sysdep_routines):

View File

@ -1,69 +0,0 @@
/*
* Written by J.T. Conklin <jtc@netbsd.org>.
* Adapted for use as log2 by Ulrich Drepper <drepper@cygnus.com>.
* Public domain.
*
* Changed to use fyl2xp1 for values near 1, <drepper@cygnus.com>.
*/
#include <machine/asm.h>
.section .rodata.cst8,"aM",@progbits,8
.p2align 3
.type one,@object
one: .double 1.0
ASM_SIZE_DIRECTIVE(one)
/* It is not important that this constant is precise. It is only
a value which is known to be on the safe side for using the
fyl2xp1 instruction. */
.type limit,@object
limit: .double 0.29
ASM_SIZE_DIRECTIVE(limit)
#ifdef PIC
# define MO(op) op##@GOTOFF(%edx)
#else
# define MO(op) op
#endif
.text
ENTRY(__ieee754_log2f)
#ifdef PIC
LOAD_PIC_REG (dx)
#endif
fldl MO(one)
flds 4(%esp) // x : 1
fxam
fnstsw
fld %st // x : x : 1
sahf
jc 3f // in case x is NaN or ±Inf
4: fsub %st(2), %st // x-1 : x : 1
fld %st // x-1 : x-1 : x : 1
fabs // |x-1| : x-1 : x : 1
fcompl MO(limit) // x-1 : x : 1
fnstsw // x-1 : x : 1
andb $0x45, %ah
jz 2f
fxam
fnstsw
andb $0x45, %ah
cmpb $0x40, %ah
jne 5f
fabs // log2(1) is +0 in all rounding modes.
5: fstp %st(1) // x-1 : 1
fyl2xp1 // log(x)
ret
2: fstp %st(0) // x : 1
fyl2x // log(x)
ret
3: jp 4b // in case x is ±Inf
fstp %st(1)
fstp %st(1)
ret
END (__ieee754_log2f)
strong_alias (__ieee754_log2f, __log2f_finite)

View File

@ -1 +0,0 @@
/* Not needed. */

View File

@ -2300,8 +2300,10 @@ ldouble: 3
Function: "log2":
double: 1
float: 1
float128: 2
idouble: 1
ifloat: 1
ifloat128: 2
ildouble: 1
ldouble: 1

View File

@ -1 +0,0 @@
#include <sysdeps/../math/w_log2f.c>

View File

@ -1,8 +1,9 @@
ifeq ($(subdir),math)
libm-sysdep_routines += e_exp2f-sse2 e_expf-sse2 e_logf-sse2 \
libm-sysdep_routines += e_exp2f-sse2 e_expf-sse2 e_logf-sse2 e_log2f-sse2 \
s_sinf-sse2 s_cosf-sse2 s_sincosf-sse2
CFLAGS-e_exp2f-sse2.c = -msse2 -mfpmath=sse
CFLAGS-e_expf-sse2.c = -msse2 -mfpmath=sse
CFLAGS-e_log2f-sse2.c = -msse2 -mfpmath=sse
CFLAGS-e_logf-sse2.c = -msse2 -mfpmath=sse
endif

View File

@ -0,0 +1,3 @@
#define __log2f __log2f_sse2
#include <sysdeps/ieee754/flt-32/e_log2f.c>

View File

@ -0,0 +1,40 @@
/* Multiple versions of log2f.
Copyright (C) 2017 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */
extern float __redirect_log2f (float);
#define SYMBOL_NAME log2f
#include "ifunc-sse2.h"
libc_ifunc_redirected (__redirect_log2f, __log2f, IFUNC_SELECTOR ());
#ifdef SHARED
__hidden_ver1 (__log2f_ia32, __GI___log2f, __redirect_log2f)
__attribute__ ((visibility ("hidden")));
# include <shlib-compat.h>
versioned_symbol (libm, __log2f, log2f, GLIBC_2_27);
#else
weak_alias (__log2f, log2f)
#endif
strong_alias (__log2f, __ieee754_log2f)
strong_alias (__log2f, __log2f_finite)
#define __log2f __log2f_ia32
#include <sysdeps/ieee754/flt-32/e_log2f.c>

View File

@ -2300,8 +2300,10 @@ ldouble: 3
Function: "log2":
double: 1
float: 1
float128: 2
idouble: 1
ifloat: 1
ifloat128: 2
ildouble: 1
ldouble: 1