Better exp polynomial

The lesser the __mul calls, the better it is for performance.
This commit is contained in:
Siddhesh Poyarekar 2013-02-13 14:49:50 +05:30
parent 909279a5cf
commit 4e92d59e26
2 changed files with 40 additions and 23 deletions

View File

@ -1,5 +1,8 @@
2013-02-13 Siddhesh Poyarekar <siddhesh@redhat.com>
* sysdeps/ieee754/dbl-64/mpexp.c (__mpexp): Faster polynomial
evaluation.
* sysdeps/ieee754/dbl-64/mpa.c (__mul): Don't bother with zero
values in the mantissa.

View File

@ -49,6 +49,15 @@ __mpexp (mp_no *x, mp_no *y, int p)
0, 0, 0, 0, 3, 3, 4, 4, 5, 4, 4, 5, 5, 5, 6, 6, 6, 6, 6, 6,
6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8, 8, 8
};
/* Factorials for the values of np above. */
static const double nfa[33] =
{
1.0, 1.0, 1.0, 1.0, 6.0, 6.0, 24.0, 24.0, 120.0, 24.0, 24.0,
120.0, 120.0, 120.0, 720.0, 720.0, 720.0, 720.0, 720.0, 720.0,
720.0, 720.0, 720.0, 720.0, 5040.0, 5040.0, 5040.0, 5040.0,
40320.0, 40320.0, 40320.0, 40320.0, 40320.0
};
static const int m1p[33] =
{
0, 0, 0, 0,
@ -71,16 +80,7 @@ __mpexp (mp_no *x, mp_no *y, int p)
{0, 0, 0, 0, 0, 0, 0, 0, 27, 0, 0, 39, 43, 47, 51, 55, 59, 63},
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 43, 47, 50, 54}
};
mp_no mpk =
{
0,
{
0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0
}
};
mp_no mps, mpak, mpt1, mpt2;
mp_no mps, mpk, mpt1, mpt2;
/* Choose m,n and compute a=2**(-m). */
n = np[p];
@ -115,24 +115,38 @@ __mpexp (mp_no *x, mp_no *y, int p)
break;
}
/* Compute s=x*2**(-m). Put result in mps. */
/* Compute s=x*2**(-m). Put result in mps. This is the range-reduced input
that we will use to compute e^s. For the final result, simply raise it
to 2^m. */
__pow_mp (-m, &mpt1, p);
__mul (x, &mpt1, &mps, p);
/* Evaluate the polynomial. Put result in mpt2. */
mpk.e = 1;
mpk.d[0] = ONE;
mpk.d[1] = n;
__dvd (&mps, &mpk, &mpt1, p);
__add (&mpone, &mpt1, &mpak, p);
for (k = n - 1; k > 1; k--)
/* Compute the Taylor series for e^s:
1 + x/1! + x^2/2! + x^3/3! ...
for N iterations. We compute this as:
e^x = 1 + (x * n!/1! + x^2 * n!/2! + x^3 * n!/3!) / n!
= 1 + (x * (n!/1! + x * (n!/2! + x * (n!/3! + x ...)))) / n!
n! is pre-computed and saved while k! is computed on the fly. */
__cpy (&mps, &mpt2, p);
double kf = 1.0;
/* Evaluate the rest. The result will be in mpt2. */
for (k = n - 1; k > 0; k--)
{
__mul (&mps, &mpak, &mpt1, p);
mpk.d[1] = k;
__dvd (&mpt1, &mpk, &mpt2, p);
__add (&mpone, &mpt2, &mpak, p);
/* n! / k! = n * (n - 1) ... * (n - k + 1) */
kf *= k + 1;
__dbl_mp (kf, &mpk, p);
__add (&mpt2, &mpk, &mpt1, p);
__mul (&mps, &mpt1, &mpt2, p);
}
__mul (&mps, &mpak, &mpt1, p);
__dbl_mp (nfa[p], &mpk, p);
__dvd (&mpt2, &mpk, &mpt1, p);
__add (&mpone, &mpt1, &mpt2, p);
/* Raise polynomial value to the power of 2**m. Put result in y. */