mirror of
https://sourceware.org/git/glibc.git
synced 2025-01-11 11:50:06 +00:00
Optimized mp multiplication
Don't bother multiplying zeroes since that only wastes cycles.
This commit is contained in:
parent
bdf028142e
commit
909279a5cf
@ -1,5 +1,8 @@
|
||||
2013-02-13 Siddhesh Poyarekar <siddhesh@redhat.com>
|
||||
|
||||
* sysdeps/ieee754/dbl-64/mpa.c (__mul): Don't bother with zero
|
||||
values in the mantissa.
|
||||
|
||||
* sysdeps/ieee754/dbl-64/mpa.c (add_magnitudes): Use ZK to
|
||||
minimize writes to Z.
|
||||
(sub_magnitudes): Simplify code a bit.
|
||||
|
@ -610,7 +610,7 @@ void
|
||||
SECTION
|
||||
__mul (const mp_no *x, const mp_no *y, mp_no *z, int p)
|
||||
{
|
||||
int i, j, k, k2;
|
||||
int i, j, k, ip, ip2;
|
||||
double u, zk;
|
||||
|
||||
/* Is z=0? */
|
||||
@ -620,11 +620,51 @@ __mul (const mp_no *x, const mp_no *y, mp_no *z, int p)
|
||||
return;
|
||||
}
|
||||
|
||||
/* Multiply, add and carry. */
|
||||
k2 = (__glibc_unlikely (p < 3)) ? p + p : p + 3;
|
||||
zk = Z[k2] = ZERO;
|
||||
/* We need not iterate through all X's and Y's since it's pointless to
|
||||
multiply zeroes. Here, both are zero... */
|
||||
for (ip2 = p; ip2 > 0; ip2--)
|
||||
if (X[ip2] != ZERO || Y[ip2] != ZERO)
|
||||
break;
|
||||
|
||||
for (k = k2; k > p; k--)
|
||||
/* ... and here, at least one of them is still zero. */
|
||||
for (ip = ip2; ip > 0; ip--)
|
||||
if (X[ip] * Y[ip] != ZERO)
|
||||
break;
|
||||
|
||||
/* The product looks like this for p = 3 (as an example):
|
||||
|
||||
|
||||
a1 a2 a3
|
||||
x b1 b2 b3
|
||||
-----------------------------
|
||||
a1*b3 a2*b3 a3*b3
|
||||
a1*b2 a2*b2 a3*b2
|
||||
a1*b1 a2*b1 a3*b1
|
||||
|
||||
So our K needs to ideally be P*2, but we're limiting ourselves to P + 3
|
||||
for P >= 3. We compute the above digits in two parts; the last P-1
|
||||
digits and then the first P digits. The last P-1 digits are a sum of
|
||||
products of the input digits from P to P-k where K is 0 for the least
|
||||
significant digit and increases as we go towards the left. The product
|
||||
term is of the form X[k]*X[P-k] as can be seen in the above example.
|
||||
|
||||
The first P digits are also a sum of products with the same product term,
|
||||
except that the sum is from 1 to k. This is also evident from the above
|
||||
example.
|
||||
|
||||
Another thing that becomes evident is that only the most significant
|
||||
ip+ip2 digits of the result are non-zero, where ip and ip2 are the
|
||||
'internal precision' of the input numbers, i.e. digits after ip and ip2
|
||||
are all ZERO. */
|
||||
|
||||
k = (__glibc_unlikely (p < 3)) ? p + p : p + 3;
|
||||
|
||||
while (k > ip + ip2 + 1)
|
||||
Z[k--] = ZERO;
|
||||
|
||||
zk = Z[k] = ZERO;
|
||||
|
||||
while (k > p)
|
||||
{
|
||||
for (i = k - p, j = p; i < p + 1; i++, j--)
|
||||
zk += X[i] * Y[j];
|
||||
@ -632,10 +672,11 @@ __mul (const mp_no *x, const mp_no *y, mp_no *z, int p)
|
||||
u = (zk + CUTTER) - CUTTER;
|
||||
if (u > zk)
|
||||
u -= RADIX;
|
||||
Z[k] = zk - u;
|
||||
Z[k--] = zk - u;
|
||||
zk = u * RADIXI;
|
||||
}
|
||||
|
||||
/* The real deal. */
|
||||
while (k > 1)
|
||||
{
|
||||
for (i = 1, j = k - 1; i < k; i++, j--)
|
||||
@ -644,9 +685,8 @@ __mul (const mp_no *x, const mp_no *y, mp_no *z, int p)
|
||||
u = (zk + CUTTER) - CUTTER;
|
||||
if (u > zk)
|
||||
u -= RADIX;
|
||||
Z[k] = zk - u;
|
||||
Z[k--] = zk - u;
|
||||
zk = u * RADIXI;
|
||||
k--;
|
||||
}
|
||||
Z[k] = zk;
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user