i386: Use generic exp10f

The generic implementation is twice as fast.  Using the exp10f
benchmark:

 * master:
  "exp10f": {
   "workload-spec2017.wrf (adapted)": {
    "duration": 1.02967e+09,
    "iterations": 4.768e+07,
    "reciprocal-throughput": 18.3579,
    "latency": 24.8331,
    "max-throughput": 5.44725e+07,
    "min-throughput": 4.02688e+07
   }
  }

 * patched:
  "exp10f": {
   "workload-spec2017.wrf (adapted)": {
    "duration": 1.01821e+09,
    "iterations": 6.1984e+07,
    "reciprocal-throughput": 13.1975,
    "latency": 19.6563,
    "max-throughput": 7.57719e+07,
    "min-throughput": 5.08743e+07
   }
  }

Checked on i686-linux-gnu.
This commit is contained in:
Adhemerval Zanella 2020-04-08 17:42:46 -03:00
parent 6e98983c09
commit 4b2d8e4442

View File

@ -1,54 +0,0 @@
/*
* Written by Ulrich Drepper.
*/
#include <machine/asm.h>
#include <i386-math-asm.h>
#include <libm-alias-finite.h>
DEFINE_FLT_MIN
#ifdef PIC
# define MO(op) op##@GOTOFF(%ecx)
#else
# define MO(op) op
#endif
.text
/* 10^x = 2^(x * log2(10)) */
ENTRY(__ieee754_exp10f)
#ifdef PIC
LOAD_PIC_REG (cx)
#endif
flds 4(%esp)
/* I added the following ugly construct because exp(+-Inf) resulted
in NaN. The ugliness results from the bright minds at Intel.
For the i686 the code can be written better.
-- drepper@cygnus.com. */
fxam /* Is NaN or +-Inf? */
fstsw %ax
movb $0x45, %dh
andb %ah, %dh
cmpb $0x05, %dh
je 1f /* Is +-Inf, jump. */
fldl2t
fmulp /* x * log2(10) */
fld %st
frndint /* int(x * log2(10)) */
fsubr %st,%st(1) /* fract(x * log2(10)) */
fxch
f2xm1 /* 2^(fract(x * log2(10))) - 1 */
fld1
faddp /* 2^(fract(x * log2(10))) */
fscale /* e^x */
fstp %st(1)
FLT_NARROW_EVAL_UFLOW_NONNEG_NAN
ret
1: testl $0x200, %eax /* Test sign. */
jz 2f /* If positive, jump. */
fstp %st
fldz /* Set result to 0. */
2: ret
END (__ieee754_exp10f)
libm_alias_finite (__ieee754_exp10f, __exp10f)