Optimize x86-32 log

This commit is contained in:
Ulrich Drepper 2011-10-14 23:41:47 -04:00
parent 396a21b1d0
commit 38ad40ceca
7 changed files with 306 additions and 3 deletions

View File

@ -1,5 +1,12 @@
2011-10-14 Ulrich Drepper <drepper@gmail.com> 2011-10-14 Ulrich Drepper <drepper@gmail.com>
* sysdeps/i386/fpu/e_log.S: Add real definition of __log_finite.
* sysdeps/i386/fpu/e_logf.S: Add real definition of __logf_finite.
* sysdeps/i386/fpu/e_logl.S: Add real definition of __logl_finite.
* sysdeps/i386/i686/fpu/e_log.S: New file.
* sysdeps/i386/i686/fpu/e_logf.S: New file.
* sysdeps/i386/i686/fpu/e_logl.S: New file.
* ctype/ctype.h: Add support for inlined isXXX functions when * ctype/ctype.h: Add support for inlined isXXX functions when
compiling C++ code. compiling C++ code.

View File

@ -62,4 +62,22 @@ ENTRY(__ieee754_log)
fstp %st(1) fstp %st(1)
ret ret
END (__ieee754_log) END (__ieee754_log)
strong_alias (__ieee754_log, __log_finite)
ENTRY(__log_finite)
fldln2 // log(2)
fldl 4(%esp) // x : log(2)
#ifdef PIC
LOAD_PIC_REG (dx)
#endif
fld %st // x : x : log(2)
fsubl MO(one) // x-1 : x : log(2)
fld %st // x-1 : x-1 : x : log(2)
fabs // |x-1| : x-1 : x : log(2)
fcompl MO(limit) // x-1 : x : log(2)
fnstsw // x-1 : x : log(2)
andb $0x45, %ah
jz 2b
fstp %st(1) // x-1 : log(2)
fyl2xp1 // log(x)
ret
END(__log_finite)

View File

@ -63,4 +63,22 @@ ENTRY(__ieee754_logf)
fstp %st(1) fstp %st(1)
ret ret
END (__ieee754_logf) END (__ieee754_logf)
strong_alias (__ieee754_logf, __logf_finite)
ENTRY(__logf_finite)
fldln2 // log(2)
flds 4(%esp) // x : log(2)
#ifdef PIC
LOAD_PIC_REG (dx)
#endif
fld %st // x : x : log(2)
fsubl MO(one) // x-1 : x : log(2)
fld %st // x-1 : x-1 : x : log(2)
fabs // |x-1| : x-1 : x : log(2)
fcompl MO(limit) // x-1 : x : log(2)
fnstsw // x-1 : x : log(2)
andb $0x45, %ah
jz 2b
fstp %st(1) // x-1 : log(2)
fyl2xp1 // log(x)
ret
END(__logf_finite)

View File

@ -63,4 +63,22 @@ ENTRY(__ieee754_logl)
fstp %st(1) fstp %st(1)
ret ret
END (__ieee754_logl) END (__ieee754_logl)
strong_alias (__ieee754_logl, __logl_finite)
ENTRY(__logl_finite)
fldln2 // log(2)
fldt 4(%esp) // x : log(2)
#ifdef PIC
LOAD_PIC_REG (dx)
#endif
fld %st // x : x : log(2)
fsubl MO(one) // x-1 : x : log(2)
fld %st // x-1 : x-1 : x : log(2)
fabs // |x-1| : x-1 : x : log(2)
fcompl MO(limit) // x-1 : x : log(2)
fnstsw // x-1 : x : log(2)
andb $0x45, %ah
jz 2b
fstp %st(1) // x-1 : log(2)
fyl2xp1 // log(x)
ret
END(__logl_finite)

View File

@ -0,0 +1,80 @@
/*
* Written by J.T. Conklin <jtc@netbsd.org>.
* Public domain.
*
* Changed to use fyl2xp1 for values near 1, <drepper@cygnus.com>.
* Adapted for i686 instructions.
*/
#include <machine/asm.h>
#ifdef __ELF__
.section .rodata.cst8,"aM",@progbits,8
#else
.text
#endif
.p2align 3
ASM_TYPE_DIRECTIVE(one,@object)
one: .double 1.0
ASM_SIZE_DIRECTIVE(one)
/* It is not important that this constant is precise. It is only
a value which is known to be on the safe side for using the
fyl2xp1 instruction. */
ASM_TYPE_DIRECTIVE(limit,@object)
limit: .double 0.29
ASM_SIZE_DIRECTIVE(limit)
#ifdef PIC
# define MO(op) op##@GOTOFF(%edx)
#else
# define MO(op) op
#endif
.text
ENTRY(__ieee754_log)
fldln2 // log(2)
fldl 4(%esp) // x : log(2)
fucomi %st
jp 3f
#ifdef PIC
LOAD_PIC_REG (dx)
#endif
fld %st // x : x : log(2)
fsubl MO(one) // x-1 : x : log(2)
fld %st // x-1 : x-1 : x : log(2)
fabs // |x-1| : x-1 : x : log(2)
fld MO(limit) // 0.29 : |x-1| : x-1 : x : log(2)
fcomip %st(1) // |x-1| : x-1 : x : log(2)
fstp %st(0) // x-1 : x : log(2)
jc 2f
fstp %st(1) // x-1 : log(2)
fyl2xp1 // log(x)
ret
2: fstp %st(0) // x : log(2)
fyl2x // log(x)
ret
3: fstp %st(1)
ret
END (__ieee754_log)
ENTRY(__log_finite)
fldln2 // log(2)
fldl 4(%esp) // x : log(2)
#ifdef PIC
LOAD_PIC_REG (dx)
#endif
fld %st // x : x : log(2)
fsubl MO(one) // x-1 : x : log(2)
fld %st // x-1 : x-1 : x : log(2)
fabs // |x-1| : x-1 : x : log(2)
fld MO(limit) // 0.29 : |x-1| : x-1 : x : log(2)
fcomip %st(1) // |x-1| : x-1 : x : log(2)
fstp %st(0) // x-1 : x : log(2)
jc 2b
fstp %st(1) // x-1 : log(2)
fyl2xp1 // log(x)
ret
END(__log_finite)

View File

@ -0,0 +1,81 @@
/*
* Written by J.T. Conklin <jtc@netbsd.org>.
* Public domain.
* Adapted for float by Ulrich Drepper <drepper@cygnus.com>.
*
* Changed to use fyl2xp1 for values near 1, <drepper@cygnus.com>.
* Adapted for i686 instructions.
*/
#include <machine/asm.h>
#ifdef __ELF__
.section .rodata.cst8,"aM",@progbits,8
#else
.text
#endif
.p2align 3
ASM_TYPE_DIRECTIVE(one,@object)
one: .double 1.0
ASM_SIZE_DIRECTIVE(one)
/* It is not important that this constant is precise. It is only
a value which is known to be on the safe side for using the
fyl2xp1 instruction. */
ASM_TYPE_DIRECTIVE(limit,@object)
limit: .double 0.29
ASM_SIZE_DIRECTIVE(limit)
#ifdef PIC
# define MO(op) op##@GOTOFF(%edx)
#else
# define MO(op) op
#endif
.text
ENTRY(__ieee754_logf)
fldln2 // log(2)
flds 4(%esp) // x : log(2)
fucomi %st
jp 3f
#ifdef PIC
LOAD_PIC_REG (dx)
#endif
fld %st // x : x : log(2)
fsubl MO(one) // x-1 : x : log(2)
fld %st // x-1 : x-1 : x : log(2)
fabs // |x-1| : x-1 : x : log(2)
fld MO(limit) // 0.29 : |x-1| : x-1 : x : log(2)
fcomip %st(1) // |x-1| : x-1 : x : log(2)
fstp %st(0) // x-1 : x : log(2)
jc 2f
fstp %st(1) // x-1 : log(2)
fyl2xp1 // log(x)
ret
2: fstp %st(0) // x : log(2)
fyl2x // log(x)
ret
3: fstp %st(1)
ret
END (__ieee754_logf)
ENTRY(__logf_finite)
fldln2 // log(2)
flds 4(%esp) // x : log(2)
#ifdef PIC
LOAD_PIC_REG (dx)
#endif
fld %st // x : x : log(2)
fsubl MO(one) // x-1 : x : log(2)
fld %st // x-1 : x-1 : x : log(2)
fabs // |x-1| : x-1 : x : log(2)
fld MO(limit) // 0.29 : |x-1| : x-1 : x : log(2)
fcomip %st(1) // |x-1| : x-1 : x : log(2)
fstp %st(0) // x-1 : x : log(2)
jc 2b
fstp %st(1) // x-1 : log(2)
fyl2xp1 // log(x)
ret
END(__logf_finite)

View File

@ -0,0 +1,81 @@
/*
* Written by J.T. Conklin <jtc@netbsd.org>.
* Public domain.
*
* Adapted for `long double' by Ulrich Drepper <drepper@cygnus.com>.
* Changed to use fyl2xp1 for values near 1, <drepper@cygnus.com>.
* Adapted for i686 instructions.
*/
#include <machine/asm.h>
#ifdef __ELF__
.section .rodata.cst8,"aM",@progbits,8
#else
.text
#endif
.p2align 3
ASM_TYPE_DIRECTIVE(one,@object)
one: .double 1.0
ASM_SIZE_DIRECTIVE(one)
/* It is not important that this constant is precise. It is only
a value which is known to be on the safe side for using the
fyl2xp1 instruction. */
ASM_TYPE_DIRECTIVE(limit,@object)
limit: .double 0.29
ASM_SIZE_DIRECTIVE(limit)
#ifdef PIC
# define MO(op) op##@GOTOFF(%edx)
#else
# define MO(op) op
#endif
.text
ENTRY(__ieee754_logl)
fldln2 // log(2)
fldt 4(%esp) // x : log(2)
fucomi %st
jp 3f
#ifdef PIC
LOAD_PIC_REG (dx)
#endif
fld %st // x : x : log(2)
fsubl MO(one) // x-1 : x : log(2)
fld %st // x-1 : x-1 : x : log(2)
fabs // |x-1| : x-1 : x : log(2)
fld MO(limit) // 0.29 : |x-1| : x-1 : x : log(2)
fcomip %st(1) // |x-1| : x-1 : x : log(2)
fstp %st(0) // x-1 : x : log(2)
jc 2f
fstp %st(1) // x-1 : log(2)
fyl2xp1 // log(x)
ret
2: fstp %st(0) // x : log(2)
fyl2x // log(x)
ret
3: fstp %st(1)
ret
END (__ieee754_logl)
ENTRY(__logl_finite)
fldln2 // log(2)
fldt 4(%esp) // x : log(2)
#ifdef PIC
LOAD_PIC_REG (dx)
#endif
fld %st // x : x : log(2)
fsubl MO(one) // x-1 : x : log(2)
fld %st // x-1 : x-1 : x : log(2)
fabs // |x-1| : x-1 : x : log(2)
fld MO(limit) // 0.29 : |x-1| : x-1 : x : log(2)
fcomip %st(1) // |x-1| : x-1 : x : log(2)
fstp %st(0) // x-1 : x : log(2)
jc 2b
fstp %st(1) // x-1 : log(2)
fyl2xp1 // log(x)
ret
END(__logl_finite)