mirror of
https://sourceware.org/git/glibc.git
synced 2024-11-11 07:40:05 +00:00
Another tweak to the multiplication algorithm
Reduce the formula to calculate mantissa so that we reduce the net number of multiplications performed.
This commit is contained in:
parent
2236d3595a
commit
45f058844c
@ -1,5 +1,8 @@
|
|||||||
2013-02-26 Siddhesh Poyarekar <siddhesh@redhat.com>
|
2013-02-26 Siddhesh Poyarekar <siddhesh@redhat.com>
|
||||||
|
|
||||||
|
* sysdeps/ieee754/dbl-64/mpa.c: Include alloca.h.
|
||||||
|
(__mul): Reduce iterations for calculating mantissa.
|
||||||
|
|
||||||
* sysdeps/ieee754/dbl-64/sincos32.c (__c32): Use MPONE and
|
* sysdeps/ieee754/dbl-64/sincos32.c (__c32): Use MPONE and
|
||||||
MPTWO.
|
MPTWO.
|
||||||
(__mpranred): Likewise.
|
(__mpranred): Likewise.
|
||||||
|
@ -43,6 +43,7 @@
|
|||||||
#include "endian.h"
|
#include "endian.h"
|
||||||
#include "mpa.h"
|
#include "mpa.h"
|
||||||
#include <sys/param.h>
|
#include <sys/param.h>
|
||||||
|
#include <alloca.h>
|
||||||
|
|
||||||
#ifndef SECTION
|
#ifndef SECTION
|
||||||
# define SECTION
|
# define SECTION
|
||||||
@ -621,6 +622,7 @@ __mul (const mp_no *x, const mp_no *y, mp_no *z, int p)
|
|||||||
long p2 = p;
|
long p2 = p;
|
||||||
double u, zk;
|
double u, zk;
|
||||||
const mp_no *a;
|
const mp_no *a;
|
||||||
|
double *diag;
|
||||||
|
|
||||||
/* Is z=0? */
|
/* Is z=0? */
|
||||||
if (__glibc_unlikely (X[0] * Y[0] == ZERO))
|
if (__glibc_unlikely (X[0] * Y[0] == ZERO))
|
||||||
@ -673,12 +675,33 @@ __mul (const mp_no *x, const mp_no *y, mp_no *z, int p)
|
|||||||
while (k > ip + ip2 + 1)
|
while (k > ip + ip2 + 1)
|
||||||
Z[k--] = ZERO;
|
Z[k--] = ZERO;
|
||||||
|
|
||||||
zk = Z[k] = ZERO;
|
zk = ZERO;
|
||||||
|
|
||||||
|
/* Precompute sums of diagonal elements so that we can directly use them
|
||||||
|
later. See the next comment to know we why need them. */
|
||||||
|
diag = alloca (k * sizeof (double));
|
||||||
|
double d = ZERO;
|
||||||
|
for (i = 1; i <= ip; i++)
|
||||||
|
{
|
||||||
|
d += X[i] * Y[i];
|
||||||
|
diag[i] = d;
|
||||||
|
}
|
||||||
|
while (i < k)
|
||||||
|
diag[i++] = d;
|
||||||
|
|
||||||
while (k > p2)
|
while (k > p2)
|
||||||
{
|
{
|
||||||
for (i = k - p2, j = p2; i < p2 + 1; i++, j--)
|
long lim = k / 2;
|
||||||
zk += X[i] * Y[j];
|
|
||||||
|
if (k % 2 == 0)
|
||||||
|
/* We want to add this only once, but since we subtract it in the sum
|
||||||
|
of products above, we add twice. */
|
||||||
|
zk += 2 * X[lim] * Y[lim];
|
||||||
|
|
||||||
|
for (i = k - p2, j = p2; i < j; i++, j--)
|
||||||
|
zk += (X[i] + X[j]) * (Y[i] + Y[j]);
|
||||||
|
|
||||||
|
zk -= diag[k - 1];
|
||||||
|
|
||||||
u = (zk + CUTTER) - CUTTER;
|
u = (zk + CUTTER) - CUTTER;
|
||||||
if (u > zk)
|
if (u > zk)
|
||||||
@ -687,11 +710,32 @@ __mul (const mp_no *x, const mp_no *y, mp_no *z, int p)
|
|||||||
zk = u * RADIXI;
|
zk = u * RADIXI;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* The real deal. */
|
/* The real deal. Mantissa digit Z[k] is the sum of all X[i] * Y[j] where i
|
||||||
|
goes from 1 -> k - 1 and j goes the same range in reverse. To reduce the
|
||||||
|
number of multiplications, we halve the range and if k is an even number,
|
||||||
|
add the diagonal element X[k/2]Y[k/2]. Through the half range, we compute
|
||||||
|
X[i] * Y[j] as (X[i] + X[j]) * (Y[i] + Y[j]) - X[i] * Y[i] - X[j] * Y[j].
|
||||||
|
|
||||||
|
This reduction tells us that we're summing two things, the first term
|
||||||
|
through the half range and the negative of the sum of the product of all
|
||||||
|
terms of X and Y in the full range. i.e.
|
||||||
|
|
||||||
|
SUM(X[i] * Y[i]) for k terms. This is precalculated above for each k in
|
||||||
|
a single loop so that it completes in O(n) time and can hence be directly
|
||||||
|
used in the loop below. */
|
||||||
while (k > 1)
|
while (k > 1)
|
||||||
{
|
{
|
||||||
for (i = 1, j = k - 1; i < k; i++, j--)
|
long lim = k / 2;
|
||||||
zk += X[i] * Y[j];
|
|
||||||
|
if (k % 2 == 0)
|
||||||
|
/* We want to add this only once, but since we subtract it in the sum
|
||||||
|
of products above, we add twice. */
|
||||||
|
zk += 2 * X[lim] * Y[lim];
|
||||||
|
|
||||||
|
for (i = 1, j = k - 1; i < j; i++, j--)
|
||||||
|
zk += (X[i] + X[j]) * (Y[i] + Y[j]);
|
||||||
|
|
||||||
|
zk -= diag[k - 1];
|
||||||
|
|
||||||
u = (zk + CUTTER) - CUTTER;
|
u = (zk + CUTTER) - CUTTER;
|
||||||
if (u > zk)
|
if (u > zk)
|
||||||
|
Loading…
Reference in New Issue
Block a user