added libtommath-0.07
This commit is contained in:
parent
16c6ccc62c
commit
3cd7000342
2
b.bat
2
b.bat
@ -1,3 +1,3 @@
|
|||||||
nasm -f coff timer.asm
|
nasm -f coff timer.asm
|
||||||
gcc -Wall -W -O3 -fomit-frame-pointer -funroll-loops -DTIMER_X86 demo.c bn.c timer.o -o ltmdemo
|
gcc -Wall -W -O3 -fomit-frame-pointer -funroll-loops -DTIMER_X86 demo.c bn.c timer.o -o ltmdemo
|
||||||
gcc -I./mtest/ -DU_MPI -Wall -W -O3 -fomit-frame-pointer -funroll-loops -DTIMER_X86 demo.c mtest/mpi.c timer.o -o mpidemo
|
rem gcc -I./mtest/ -DU_MPI -Wall -W -O3 -fomit-frame-pointer -funroll-loops -DTIMER_X86 demo.c mtest/mpi.c timer.o -o mpidemo
|
||||||
|
237
bn.c
237
bn.c
@ -849,8 +849,7 @@ static int s_mp_sub(mp_int *a, mp_int *b, mp_int *c)
|
|||||||
*/
|
*/
|
||||||
static int fast_s_mp_mul_digs(mp_int *a, mp_int *b, mp_int *c, int digs)
|
static int fast_s_mp_mul_digs(mp_int *a, mp_int *b, mp_int *c, int digs)
|
||||||
{
|
{
|
||||||
mp_int t;
|
int olduse, res, pa, pb, ix, iy;
|
||||||
int res, pa, pb, ix, iy;
|
|
||||||
mp_word W[512], *_W;
|
mp_word W[512], *_W;
|
||||||
mp_digit tmpx, *tmpy;
|
mp_digit tmpx, *tmpy;
|
||||||
|
|
||||||
@ -859,11 +858,12 @@ static int fast_s_mp_mul_digs(mp_int *a, mp_int *b, mp_int *c, int digs)
|
|||||||
VERIFY(b);
|
VERIFY(b);
|
||||||
VERIFY(c);
|
VERIFY(c);
|
||||||
|
|
||||||
if ((res = mp_init_size(&t, digs)) != MP_OKAY) {
|
if (c->alloc < digs) {
|
||||||
DECFUNC();
|
if ((res = mp_grow(c, digs)) != MP_OKAY) {
|
||||||
return res;
|
DECFUNC();
|
||||||
|
return res;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
t.used = digs;
|
|
||||||
|
|
||||||
/* clear temp buf (the columns) */
|
/* clear temp buf (the columns) */
|
||||||
memset(W, 0, digs*sizeof(mp_word));
|
memset(W, 0, digs*sizeof(mp_word));
|
||||||
@ -893,6 +893,11 @@ static int fast_s_mp_mul_digs(mp_int *a, mp_int *b, mp_int *c, int digs)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* setup dest */
|
||||||
|
olduse = c->used;
|
||||||
|
c->used = digs;
|
||||||
|
|
||||||
|
|
||||||
/* At this point W[] contains the sums of each column. To get the
|
/* At this point W[] contains the sums of each column. To get the
|
||||||
* correct result we must take the extra bits from each column and
|
* correct result we must take the extra bits from each column and
|
||||||
* carry them down
|
* carry them down
|
||||||
@ -904,14 +909,17 @@ static int fast_s_mp_mul_digs(mp_int *a, mp_int *b, mp_int *c, int digs)
|
|||||||
* this is slower but on most cryptographic size numbers it is faster.
|
* this is slower but on most cryptographic size numbers it is faster.
|
||||||
*/
|
*/
|
||||||
for (ix = 1; ix < digs; ix++) {
|
for (ix = 1; ix < digs; ix++) {
|
||||||
W[ix] = W[ix] + (W[ix-1] >> ((mp_word)DIGIT_BIT));
|
W[ix] = W[ix] + (W[ix-1] >> ((mp_word)DIGIT_BIT));
|
||||||
t.dp[ix-1] = W[ix-1] & ((mp_word)MP_MASK);
|
c->dp[ix-1] = W[ix-1] & ((mp_word)MP_MASK);
|
||||||
|
}
|
||||||
|
c->dp[digs-1] = W[digs-1] & ((mp_word)MP_MASK);
|
||||||
|
|
||||||
|
/* clear unused */
|
||||||
|
for (ix = c->used; ix < olduse; ix++) {
|
||||||
|
c->dp[ix] = 0;
|
||||||
}
|
}
|
||||||
t.dp[digs-1] = W[digs-1] & ((mp_word)MP_MASK);
|
|
||||||
|
|
||||||
mp_clamp(&t);
|
mp_clamp(c);
|
||||||
mp_exch(&t, c);
|
|
||||||
mp_clear(&t);
|
|
||||||
DECFUNC();
|
DECFUNC();
|
||||||
return MP_OKAY;
|
return MP_OKAY;
|
||||||
}
|
}
|
||||||
@ -993,8 +1001,7 @@ static int s_mp_mul_digs(mp_int *a, mp_int *b, mp_int *c, int digs)
|
|||||||
*/
|
*/
|
||||||
static int fast_s_mp_mul_high_digs(mp_int *a, mp_int *b, mp_int *c, int digs)
|
static int fast_s_mp_mul_high_digs(mp_int *a, mp_int *b, mp_int *c, int digs)
|
||||||
{
|
{
|
||||||
mp_int t;
|
int oldused, newused, res, pa, pb, ix, iy;
|
||||||
int res, pa, pb, ix, iy;
|
|
||||||
mp_word W[512], *_W;
|
mp_word W[512], *_W;
|
||||||
mp_digit tmpx, *tmpy;
|
mp_digit tmpx, *tmpy;
|
||||||
|
|
||||||
@ -1003,11 +1010,13 @@ static int fast_s_mp_mul_high_digs(mp_int *a, mp_int *b, mp_int *c, int digs)
|
|||||||
VERIFY(b);
|
VERIFY(b);
|
||||||
VERIFY(c);
|
VERIFY(c);
|
||||||
|
|
||||||
if ((res = mp_init_size(&t, a->used + b->used + 1)) != MP_OKAY) {
|
newused = a->used + b->used + 1;
|
||||||
DECFUNC();
|
if (c->alloc < newused) {
|
||||||
return res;
|
if ((res = mp_grow(c, newused)) != MP_OKAY) {
|
||||||
|
DECFUNC();
|
||||||
|
return res;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
t.used = a->used + b->used + 1;
|
|
||||||
|
|
||||||
/* like the other comba method we compute the columns first */
|
/* like the other comba method we compute the columns first */
|
||||||
pa = a->used;
|
pa = a->used;
|
||||||
@ -1025,17 +1034,21 @@ static int fast_s_mp_mul_high_digs(mp_int *a, mp_int *b, mp_int *c, int digs)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* setup dest */
|
||||||
|
oldused = c->used;
|
||||||
|
c->used = newused;
|
||||||
|
|
||||||
/* now convert the array W downto what we need */
|
/* now convert the array W downto what we need */
|
||||||
for (ix = digs+1; ix < (pa+pb+1); ix++) {
|
for (ix = digs+1; ix < (pa+pb+1); ix++) {
|
||||||
W[ix] = W[ix] + (W[ix-1] >> ((mp_word)DIGIT_BIT));
|
W[ix] = W[ix] + (W[ix-1] >> ((mp_word)DIGIT_BIT));
|
||||||
t.dp[ix-1] = W[ix-1] & ((mp_word)MP_MASK);
|
c->dp[ix-1] = W[ix-1] & ((mp_word)MP_MASK);
|
||||||
}
|
}
|
||||||
t.dp[(pa+pb+1)-1] = W[(pa+pb+1)-1] & ((mp_word)MP_MASK);
|
c->dp[(pa+pb+1)-1] = W[(pa+pb+1)-1] & ((mp_word)MP_MASK);
|
||||||
|
|
||||||
|
for (ix = c->used; ix < oldused; ix++) {
|
||||||
mp_clamp(&t);
|
c->dp[ix] = 0;
|
||||||
mp_exch(&t, c);
|
}
|
||||||
mp_clear(&t);
|
mp_clamp(c);
|
||||||
DECFUNC();
|
DECFUNC();
|
||||||
return MP_OKAY;
|
return MP_OKAY;
|
||||||
}
|
}
|
||||||
@ -1106,8 +1119,7 @@ static int s_mp_mul_high_digs(mp_int *a, mp_int *b, mp_int *c, int digs)
|
|||||||
*/
|
*/
|
||||||
static int fast_s_mp_sqr(mp_int *a, mp_int *b)
|
static int fast_s_mp_sqr(mp_int *a, mp_int *b)
|
||||||
{
|
{
|
||||||
mp_int t;
|
int olduse, newused, res, ix, iy, pa;
|
||||||
int res, ix, iy, pa;
|
|
||||||
mp_word W2[512], W[512], *_W;
|
mp_word W2[512], W[512], *_W;
|
||||||
mp_digit tmpx, *tmpy;
|
mp_digit tmpx, *tmpy;
|
||||||
|
|
||||||
@ -1116,11 +1128,13 @@ static int fast_s_mp_sqr(mp_int *a, mp_int *b)
|
|||||||
VERIFY(b);
|
VERIFY(b);
|
||||||
|
|
||||||
pa = a->used;
|
pa = a->used;
|
||||||
if ((res = mp_init_size(&t, pa + pa + 1)) != MP_OKAY) {
|
newused = pa + pa + 1;
|
||||||
DECFUNC();
|
if (b->alloc < newused) {
|
||||||
return res;
|
if ((res = mp_grow(b, newused)) != MP_OKAY) {
|
||||||
}
|
DECFUNC();
|
||||||
t.used = pa + pa + 1;
|
return res;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/* zero temp buffer (columns) */
|
/* zero temp buffer (columns) */
|
||||||
memset(W, 0, (pa+pa+1)*sizeof(mp_word));
|
memset(W, 0, (pa+pa+1)*sizeof(mp_word));
|
||||||
@ -1144,19 +1158,29 @@ static int fast_s_mp_sqr(mp_int *a, mp_int *b)
|
|||||||
/* double first value, since the inner products are half of what they should be */
|
/* double first value, since the inner products are half of what they should be */
|
||||||
W[0] += W[0] + W2[0];
|
W[0] += W[0] + W2[0];
|
||||||
|
|
||||||
|
/* setup dest */
|
||||||
|
olduse = b->used;
|
||||||
|
b->used = newused;
|
||||||
|
|
||||||
/* now compute digits */
|
/* now compute digits */
|
||||||
for (ix = 1; ix < (pa+pa+1); ix++) {
|
for (ix = 1; ix < (pa+pa+1); ix++) {
|
||||||
/* double/add next digit */
|
/* double/add next digit */
|
||||||
W[ix] += W[ix] + W2[ix];
|
W[ix] += W[ix] + W2[ix];
|
||||||
|
|
||||||
W[ix] = W[ix] + (W[ix-1] >> ((mp_word)DIGIT_BIT));
|
W[ix] = W[ix] + (W[ix-1] >> ((mp_word)DIGIT_BIT));
|
||||||
t.dp[ix-1] = W[ix-1] & ((mp_word)MP_MASK);
|
b->dp[ix-1] = W[ix-1] & ((mp_word)MP_MASK);
|
||||||
}
|
}
|
||||||
t.dp[(pa+pa+1)-1] = W[(pa+pa+1)-1] & ((mp_word)MP_MASK);
|
b->dp[(pa+pa+1)-1] = W[(pa+pa+1)-1] & ((mp_word)MP_MASK);
|
||||||
|
|
||||||
mp_clamp(&t);
|
/* clear high */
|
||||||
mp_exch(&t, b);
|
for (ix = b->used; ix < olduse; ix++) {
|
||||||
mp_clear(&t);
|
b->dp[ix] = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* fix the sign (since we no longer make a fresh temp) */
|
||||||
|
b->sign = MP_ZPOS;
|
||||||
|
|
||||||
|
mp_clamp(b);
|
||||||
DECFUNC();
|
DECFUNC();
|
||||||
return MP_OKAY;
|
return MP_OKAY;
|
||||||
}
|
}
|
||||||
@ -1173,13 +1197,13 @@ static int s_mp_sqr(mp_int *a, mp_int *b)
|
|||||||
VERIFY(a);
|
VERIFY(a);
|
||||||
VERIFY(b);
|
VERIFY(b);
|
||||||
|
|
||||||
/* can we use the fast multiplier? */
|
/* can we use the fast multiplier? */
|
||||||
if (((a->used * 2 + 1) < 512) && a->used < (1<<( (CHAR_BIT*sizeof(mp_word)) - (2*DIGIT_BIT) - 1))) {
|
if (((a->used * 2 + 1) < 512) && a->used < (1<<( (CHAR_BIT*sizeof(mp_word)) - (2*DIGIT_BIT) - 1))) {
|
||||||
res = fast_s_mp_sqr(a,b);
|
res = fast_s_mp_sqr(a,b);
|
||||||
DECFUNC();
|
DECFUNC();
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
pa = a->used;
|
pa = a->used;
|
||||||
if ((res = mp_init_size(&t, pa + pa + 1)) != MP_OKAY) {
|
if ((res = mp_init_size(&t, pa + pa + 1)) != MP_OKAY) {
|
||||||
DECFUNC();
|
DECFUNC();
|
||||||
@ -1385,10 +1409,9 @@ static int mp_karatsuba_mul(mp_int *a, mp_int *b, mp_int *c)
|
|||||||
if (mp_lshd(&x1y1, B*2) != MP_OKAY) goto X1Y1; /* x1y1 = x1y1 << 2*B */
|
if (mp_lshd(&x1y1, B*2) != MP_OKAY) goto X1Y1; /* x1y1 = x1y1 << 2*B */
|
||||||
|
|
||||||
if (mp_add(&x0y0, &t1, &t1) != MP_OKAY) goto X1Y1; /* t1 = x0y0 + t1 */
|
if (mp_add(&x0y0, &t1, &t1) != MP_OKAY) goto X1Y1; /* t1 = x0y0 + t1 */
|
||||||
if (mp_add(&t1, &x1y1, &t1) != MP_OKAY) goto X1Y1; /* t1 = x0y0 + t1 + x1y1 */
|
if (mp_add(&t1, &x1y1, c) != MP_OKAY) goto X1Y1; /* t1 = x0y0 + t1 + x1y1 */
|
||||||
|
|
||||||
err = MP_OKAY;
|
err = MP_OKAY;
|
||||||
mp_exch(&t1, c);
|
|
||||||
|
|
||||||
X1Y1: mp_clear(&x1y1);
|
X1Y1: mp_clear(&x1y1);
|
||||||
X0Y0: mp_clear(&x0y0);
|
X0Y0: mp_clear(&x0y0);
|
||||||
@ -1426,7 +1449,7 @@ int mp_mul(mp_int *a, mp_int *b, mp_int *c)
|
|||||||
static int mp_karatsuba_sqr(mp_int *a, mp_int *b)
|
static int mp_karatsuba_sqr(mp_int *a, mp_int *b)
|
||||||
{
|
{
|
||||||
mp_int x0, x1, t1, t2, x0x0, x1x1;
|
mp_int x0, x1, t1, t2, x0x0, x1x1;
|
||||||
int B, err;
|
int B, err, x;
|
||||||
|
|
||||||
REGFUNC("mp_karatsuba_sqr");
|
REGFUNC("mp_karatsuba_sqr");
|
||||||
VERIFY(a);
|
VERIFY(a);
|
||||||
@ -1441,8 +1464,8 @@ static int mp_karatsuba_sqr(mp_int *a, mp_int *b)
|
|||||||
B = B/2;
|
B = B/2;
|
||||||
|
|
||||||
/* init copy all the temps */
|
/* init copy all the temps */
|
||||||
if (mp_init_copy(&x0, a) != MP_OKAY) goto ERR;
|
if (mp_init_size(&x0, B) != MP_OKAY) goto ERR;
|
||||||
if (mp_init_copy(&x1, a) != MP_OKAY) goto X0;
|
if (mp_init_size(&x1, a->used - B) != MP_OKAY) goto X0;
|
||||||
|
|
||||||
/* init temps */
|
/* init temps */
|
||||||
if (mp_init(&t1) != MP_OKAY) goto X1;
|
if (mp_init(&t1) != MP_OKAY) goto X1;
|
||||||
@ -1451,16 +1474,27 @@ static int mp_karatsuba_sqr(mp_int *a, mp_int *b)
|
|||||||
if (mp_init(&x1x1) != MP_OKAY) goto X0X0;
|
if (mp_init(&x1x1) != MP_OKAY) goto X0X0;
|
||||||
|
|
||||||
/* now shift the digits */
|
/* now shift the digits */
|
||||||
mp_mod_2d(&x0, B*DIGIT_BIT, &x0);
|
for (x = 0; x < B; x++) {
|
||||||
mp_rshd(&x1, B);
|
x0.dp[x] = a->dp[x];
|
||||||
|
}
|
||||||
|
|
||||||
|
for (x = B; x < a->used; x++) {
|
||||||
|
x1.dp[x-B] = a->dp[x];
|
||||||
|
}
|
||||||
|
|
||||||
|
x0.used = B;
|
||||||
|
x1.used = a->used - B;
|
||||||
|
|
||||||
|
mp_clamp(&x0);
|
||||||
|
mp_clamp(&x1);
|
||||||
|
|
||||||
/* now calc the products x0*x0 and x1*x1 */
|
/* now calc the products x0*x0 and x1*x1 */
|
||||||
if (s_mp_sqr(&x0, &x0x0) != MP_OKAY) goto X1X1; /* x0x0 = x0*x0 */
|
if (mp_sqr(&x0, &x0x0) != MP_OKAY) goto X1X1; /* x0x0 = x0*x0 */
|
||||||
if (s_mp_sqr(&x1, &x1x1) != MP_OKAY) goto X1X1; /* x1x1 = x1*x1 */
|
if (mp_sqr(&x1, &x1x1) != MP_OKAY) goto X1X1; /* x1x1 = x1*x1 */
|
||||||
|
|
||||||
/* now calc x1-x0 and y1-y0 */
|
/* now calc x1-x0 and y1-y0 */
|
||||||
if (mp_sub(&x1, &x0, &t1) != MP_OKAY) goto X1X1; /* t1 = x1 - x0 */
|
if (mp_sub(&x1, &x0, &t1) != MP_OKAY) goto X1X1; /* t1 = x1 - x0 */
|
||||||
if (s_mp_sqr(&t1, &t1) != MP_OKAY) goto X1X1; /* t1 = (x1 - x0) * (y1 - y0) */
|
if (mp_sqr(&t1, &t1) != MP_OKAY) goto X1X1; /* t1 = (x1 - x0) * (y1 - y0) */
|
||||||
|
|
||||||
/* add x0y0 */
|
/* add x0y0 */
|
||||||
if (mp_add(&x0x0, &x1x1, &t2) != MP_OKAY) goto X1X1; /* t2 = x0y0 + x1y1 */
|
if (mp_add(&x0x0, &x1x1, &t2) != MP_OKAY) goto X1X1; /* t2 = x0y0 + x1y1 */
|
||||||
@ -1471,10 +1505,9 @@ static int mp_karatsuba_sqr(mp_int *a, mp_int *b)
|
|||||||
if (mp_lshd(&x1x1, B*2) != MP_OKAY) goto X1X1; /* x1y1 = x1y1 << 2*B */
|
if (mp_lshd(&x1x1, B*2) != MP_OKAY) goto X1X1; /* x1y1 = x1y1 << 2*B */
|
||||||
|
|
||||||
if (mp_add(&x0x0, &t1, &t1) != MP_OKAY) goto X1X1; /* t1 = x0y0 + t1 */
|
if (mp_add(&x0x0, &t1, &t1) != MP_OKAY) goto X1X1; /* t1 = x0y0 + t1 */
|
||||||
if (mp_add(&t1, &x1x1, &t1) != MP_OKAY) goto X1X1; /* t1 = x0y0 + t1 + x1y1 */
|
if (mp_add(&t1, &x1x1, b) != MP_OKAY) goto X1X1; /* t1 = x0y0 + t1 + x1y1 */
|
||||||
|
|
||||||
err = MP_OKAY;
|
err = MP_OKAY;
|
||||||
mp_exch(&t1, b);
|
|
||||||
|
|
||||||
X1X1: mp_clear(&x1x1);
|
X1X1: mp_clear(&x1x1);
|
||||||
X0X0: mp_clear(&x0x0);
|
X0X0: mp_clear(&x0x0);
|
||||||
@ -2784,6 +2817,102 @@ __M :
|
|||||||
return err;
|
return err;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* find the n'th root of an integer
|
||||||
|
*
|
||||||
|
* Result found such that (c)^b <= a and (c+1)^b > a
|
||||||
|
*/
|
||||||
|
int mp_n_root(mp_int *a, mp_digit b, mp_int *c)
|
||||||
|
{
|
||||||
|
mp_int t1, t2, t3;
|
||||||
|
int res, neg;
|
||||||
|
|
||||||
|
/* input must be positive if b is even*/
|
||||||
|
if ((b&1) == 0 && a->sign == MP_NEG) {
|
||||||
|
return MP_VAL;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ((res = mp_init(&t1)) != MP_OKAY) {
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ((res = mp_init(&t2)) != MP_OKAY) {
|
||||||
|
goto __T1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ((res = mp_init(&t3)) != MP_OKAY) {
|
||||||
|
goto __T2;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* if a is negative fudge the sign but keep track */
|
||||||
|
neg = a->sign;
|
||||||
|
a->sign = MP_ZPOS;
|
||||||
|
|
||||||
|
/* t2 = a */
|
||||||
|
if ((res = mp_copy(a, &t2)) != MP_OKAY) {
|
||||||
|
goto __T3;
|
||||||
|
}
|
||||||
|
|
||||||
|
do {
|
||||||
|
/* t1 = t2 */
|
||||||
|
if ((res = mp_copy(&t2, &t1)) != MP_OKAY) {
|
||||||
|
goto __T3;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* t2 = t1 - ((t1^b - a) / (b * t1^(b-1))) */
|
||||||
|
if ((res = mp_expt_d(&t1, b-1, &t3)) != MP_OKAY) { /* t3 = t1^(b-1) */
|
||||||
|
goto __T3;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* numerator */
|
||||||
|
if ((res = mp_mul(&t3, &t1, &t2)) != MP_OKAY) { /* t2 = t1^b */
|
||||||
|
goto __T3;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ((res = mp_sub(&t2, a, &t2)) != MP_OKAY) { /* t2 = t1^b - a */
|
||||||
|
goto __T3;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ((res = mp_mul_d(&t3, b, &t3)) != MP_OKAY) { /* t3 = t1^(b-1) * b */
|
||||||
|
goto __T3;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ((res = mp_div(&t2, &t3, &t3, NULL)) != MP_OKAY) { /* t3 = (t1^b - a)/(b * t1^(b-1)) */
|
||||||
|
goto __T3;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ((res = mp_sub(&t1, &t3, &t2)) != MP_OKAY) {
|
||||||
|
goto __T3;
|
||||||
|
}
|
||||||
|
} while (mp_cmp(&t1, &t2) != MP_EQ);
|
||||||
|
|
||||||
|
/* result can be at most off by one so check */
|
||||||
|
if ((res = mp_expt_d(&t1, b, &t2)) != MP_OKAY) {
|
||||||
|
goto __T3;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (mp_cmp(&t2, a) == MP_GT) {
|
||||||
|
if ((res = mp_sub_d(&t1, 1, &t1)) != MP_OKAY) {
|
||||||
|
goto __T3;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* reset the sign of a first */
|
||||||
|
a->sign = neg;
|
||||||
|
|
||||||
|
/* set the result */
|
||||||
|
mp_exch(&t1, c);
|
||||||
|
|
||||||
|
/* set the sign of the result */
|
||||||
|
c->sign = neg;
|
||||||
|
|
||||||
|
res = MP_OKAY;
|
||||||
|
|
||||||
|
__T3: mp_clear(&t3);
|
||||||
|
__T2: mp_clear(&t2);
|
||||||
|
__T1: mp_clear(&t1);
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
/* --> radix conversion <-- */
|
/* --> radix conversion <-- */
|
||||||
/* reverse an array, used for radix code */
|
/* reverse an array, used for radix code */
|
||||||
static void reverse(unsigned char *s, int len)
|
static void reverse(unsigned char *s, int len)
|
||||||
|
9
bn.h
9
bn.h
@ -233,6 +233,15 @@ int mp_gcd(mp_int *a, mp_int *b, mp_int *c);
|
|||||||
/* c = [a, b] or (a*b)/(a, b) */
|
/* c = [a, b] or (a*b)/(a, b) */
|
||||||
int mp_lcm(mp_int *a, mp_int *b, mp_int *c);
|
int mp_lcm(mp_int *a, mp_int *b, mp_int *c);
|
||||||
|
|
||||||
|
/* finds one of the b'th root of a, such that |c|^b <= |a|
|
||||||
|
*
|
||||||
|
* returns error if a < 0 and b is even
|
||||||
|
*/
|
||||||
|
int mp_n_root(mp_int *a, mp_digit b, mp_int *c);
|
||||||
|
|
||||||
|
/* shortcut for square root */
|
||||||
|
#define mp_sqrt(a, b) mp_n_root(a, 2, b)
|
||||||
|
|
||||||
/* used to setup the Barrett reduction for a given modulus b */
|
/* used to setup the Barrett reduction for a given modulus b */
|
||||||
int mp_reduce_setup(mp_int *a, mp_int *b);
|
int mp_reduce_setup(mp_int *a, mp_int *b);
|
||||||
|
|
||||||
|
107
bn.tex
107
bn.tex
@ -1,7 +1,7 @@
|
|||||||
\documentclass{article}
|
\documentclass{article}
|
||||||
\begin{document}
|
\begin{document}
|
||||||
|
|
||||||
\title{LibTomMath v0.06 \\ A Free Multiple Precision Integer Library}
|
\title{LibTomMath v0.07 \\ A Free Multiple Precision Integer Library}
|
||||||
\author{Tom St Denis \\ tomstdenis@iahu.ca}
|
\author{Tom St Denis \\ tomstdenis@iahu.ca}
|
||||||
\maketitle
|
\maketitle
|
||||||
\newpage
|
\newpage
|
||||||
@ -187,17 +187,6 @@ int mp_mul_2(mp_int *a, mp_int *b);
|
|||||||
int mp_mod_2d(mp_int *a, int b, mp_int *c);
|
int mp_mod_2d(mp_int *a, int b, mp_int *c);
|
||||||
\end{verbatim}
|
\end{verbatim}
|
||||||
|
|
||||||
Both the \textbf{mp\_rshd} and \textbf{mp\_lshd} functions provide shifting by whole digits. For example,
|
|
||||||
mp\_rshd($x$, $n$) is the same as $x \leftarrow \lfloor x / \beta^n \rfloor$ while mp\_lshd($x$, $n$) is equivalent
|
|
||||||
to $x \leftarrow x \cdot \beta^n$. Both functions are extremely fast as they merely copy digits within the array.
|
|
||||||
|
|
||||||
Similarly the \textbf{mp\_div\_2d} and \textbf{mp\_mul\_2d} functions provide shifting but allow any bit count to
|
|
||||||
be specified. For example, mp\_div\_2d($x$, $n$, $y$) is the same as $y =\lfloor x / 2^n \rfloor$ while
|
|
||||||
mp\_mul\_2d($x$, $n$, $y$) is the same as $y = x \cdot 2^n$. The \textbf{mp\_div\_2} and \textbf{mp\_mul\_2}
|
|
||||||
functions are legacy functions that merely shift right or left one bit respectively. The \textbf{mp\_mod\_2d} function
|
|
||||||
reduces an integer mod a power of two. For example, mp\_mod\_2d($x$, $n$, $y$) is the same as
|
|
||||||
$y \equiv x \mbox{ (mod }2^n\mbox{)}$.
|
|
||||||
|
|
||||||
\subsection{Basic Arithmetic}
|
\subsection{Basic Arithmetic}
|
||||||
|
|
||||||
Next are the class of functions which provide basic arithmetic.
|
Next are the class of functions which provide basic arithmetic.
|
||||||
@ -234,17 +223,7 @@ int mp_div(mp_int *a, mp_int *b, mp_int *c, mp_int *d);
|
|||||||
int mp_mod(mp_int *a, mp_int *b, mp_int *c);
|
int mp_mod(mp_int *a, mp_int *b, mp_int *c);
|
||||||
\end{verbatim}
|
\end{verbatim}
|
||||||
|
|
||||||
The \textbf{mp\_cmp} will compare two integers. It will return \textbf{MP\_LT} if the first parameter is less than
|
\subsection{Single Digit Functions}
|
||||||
the second, \textbf{MP\_GT} if it is greater or \textbf{MP\_EQ} if they are equal. These constants are the same as from
|
|
||||||
MPI.
|
|
||||||
|
|
||||||
The \textbf{mp\_add}, \textbf{mp\_sub}, \textbf{mp\_mul}, \textbf{mp\_div}, \textbf{mp\_sqr} and \textbf{mp\_mod} are all
|
|
||||||
fairly straight forward to understand. Note that in mp\_div either $c$ (the quotient) or $d$ (the remainder) can be
|
|
||||||
passed as NULL to ignore it. For example, if you only want the quotient $z = \lfloor x/y \rfloor$ then a call such as
|
|
||||||
mp\_div(\&x, \&y, \&z, NULL) is acceptable.
|
|
||||||
|
|
||||||
There is a related class of ``single digit'' functions that are like the above except they use a digit as the second
|
|
||||||
operand.
|
|
||||||
|
|
||||||
\begin{verbatim}
|
\begin{verbatim}
|
||||||
/* compare against a single digit */
|
/* compare against a single digit */
|
||||||
@ -296,14 +275,13 @@ int mp_gcd(mp_int *a, mp_int *b, mp_int *c);
|
|||||||
/* c = [a, b] or (a*b)/(a, b) */
|
/* c = [a, b] or (a*b)/(a, b) */
|
||||||
int mp_lcm(mp_int *a, mp_int *b, mp_int *c);
|
int mp_lcm(mp_int *a, mp_int *b, mp_int *c);
|
||||||
|
|
||||||
|
/* find the b'th root of a */
|
||||||
|
int mp_n_root(mp_int *a, mp_digit b, mp_int *c);
|
||||||
|
|
||||||
/* d = a^b (mod c) */
|
/* d = a^b (mod c) */
|
||||||
int mp_exptmod(mp_int *a, mp_int *b, mp_int *c, mp_int *d);
|
int mp_exptmod(mp_int *a, mp_int *b, mp_int *c, mp_int *d);
|
||||||
\end{verbatim}
|
\end{verbatim}
|
||||||
|
|
||||||
These are all fairly simple to understand. The \textbf{mp\_invmod} is a modular multiplicative inverse. That is it
|
|
||||||
stores in the third parameter an integer such that $ac \equiv 1 \mbox{ (mod }b\mbox{)}$ provided such integer exists. If
|
|
||||||
there is no such integer the function returns \textbf{MP\_VAL}.
|
|
||||||
|
|
||||||
\subsection{Radix Conversions}
|
\subsection{Radix Conversions}
|
||||||
To read or store integers in other formats there are the following functions.
|
To read or store integers in other formats there are the following functions.
|
||||||
|
|
||||||
@ -432,7 +410,7 @@ when $b \le 0$, in theory the routine will still give a properly congruent answe
|
|||||||
|
|
||||||
This function requires $O(4 \cdot N)$ memory and $O(3 \cdot N^2)$ time.
|
This function requires $O(4 \cdot N)$ memory and $O(3 \cdot N^2)$ time.
|
||||||
|
|
||||||
\subsection{Modular Arithmetic}
|
\subsection{Number Theoretic Functions}
|
||||||
|
|
||||||
\subsubsection{mp\_addmod, mp\_submod, mp\_mulmod, mp\_sqrmod}
|
\subsubsection{mp\_addmod, mp\_submod, mp\_mulmod, mp\_sqrmod}
|
||||||
These functions take the time of their host function plus the time it takes to perform a division. For example,
|
These functions take the time of their host function plus the time it takes to perform a division. For example,
|
||||||
@ -445,6 +423,41 @@ Also note that these functions use mp\_mod which means the result are guaranteed
|
|||||||
This function will find $c = 1/a \mbox{ (mod }b\mbox{)}$ for any value of $a$ such that $(a, b) = 1$ and $b > 0$. When
|
This function will find $c = 1/a \mbox{ (mod }b\mbox{)}$ for any value of $a$ such that $(a, b) = 1$ and $b > 0$. When
|
||||||
$b$ is odd a ``fast'' variant is used which finds the inverse twice as fast.
|
$b$ is odd a ``fast'' variant is used which finds the inverse twice as fast.
|
||||||
|
|
||||||
|
\subsubsection{mp\_gcd(mp\_int *a, mp\_int *b, mp\_int *c)}
|
||||||
|
Finds the greatest common divisor of both $a$ and $b$ and places the result in $c$. Will work with either positive
|
||||||
|
or negative inputs.
|
||||||
|
|
||||||
|
Functions requires no additional memory and approximately $O(N \cdot log(N))$ time.
|
||||||
|
|
||||||
|
\subsubsection{mp\_lcm(mp\_int *a, mp\_int *b, mp\_int *c)}
|
||||||
|
Finds the least common multiple of both $a$ and $b$ and places the result in $c$. Will work with either positive
|
||||||
|
or negative inputs. This is calculated by dividing the product of $a$ and $b$ by the greatest common divisor of
|
||||||
|
both.
|
||||||
|
|
||||||
|
Functions requires no additional memory and approximately $O(4 \cdot N^2)$ time.
|
||||||
|
|
||||||
|
\subsubsection{mp\_n\_root(mp\_int *a, mp\_digit b, mp\_int c)}
|
||||||
|
Finds the $b$'th root of $a$ and stores it in $b$. The roots are found such that $\vert c \vert^b \le \vert a \vert$.
|
||||||
|
Uses the Newton approximation approach which means it converges in $O(log \beta^N)$ time to a final result. Each iteration
|
||||||
|
requires $b$ multiplications and one division for a total work of $O(6N^2 \cdot log \beta^N) = O(6N^3 \cdot log \beta)$.
|
||||||
|
|
||||||
|
If the input $a$ is negative and $b$ is even the function returns an error. Otherwise the function will return a root
|
||||||
|
that has a sign that agrees with the sign of $a$.
|
||||||
|
|
||||||
|
\subsubsection{mp\_exptmod(mp\_int *a, mp\_int *b, mp\_int *c, mp\_int *d)}
|
||||||
|
Computes $d = a^b \mbox{ (mod }c\mbox{)}$ using a sliding window $k$-ary exponentiation algorithm. For an $\alpha$-bit
|
||||||
|
exponent it performs $\alpha$ squarings and at most $\lfloor \alpha/k \rfloor + k$ multiplications. The value of $k$ is
|
||||||
|
chosen to minimize the number of multiplications required for a given value of $\alpha$. Barrett reductions are used
|
||||||
|
to reduce the squared or multiplied temporary results modulo $c$. A Barrett reduction requires one division that is
|
||||||
|
performed only and two half multipliers of $N$ digit numbers resulting in approximation $O((N^2)/2)$ work.
|
||||||
|
|
||||||
|
Let $\gamma = \lfloor \alpha/k \rfloor + k$ represent the total multiplications. The total work of a exponentiation is
|
||||||
|
therefore, $O(3 \cdot N^2 + (\alpha + \gamma) \cdot ((N^2)/2) + \alpha \cdot ((N^2 + N)/2) + \gamma \cdot N^2)$ which
|
||||||
|
simplies to $O(3 \cdot N^2 + \gamma N^2 + \alpha (N^2 + (N/2)))$. The bulk of the time is spent in the Barrett
|
||||||
|
reductions and the squaring algorithms. Since $\gamma < \alpha$ it makes sense to optimize first the Barrett and
|
||||||
|
squaring routines first. Significant improvements in the future will most likely stem from optimizing these instead
|
||||||
|
of optimizing the multipliers.
|
||||||
|
|
||||||
\section{Timing Analysis}
|
\section{Timing Analysis}
|
||||||
\subsection{Observed Timings}
|
\subsection{Observed Timings}
|
||||||
A simple test program ``demo.c'' was developed which builds with either MPI or LibTomMath (without modification). The
|
A simple test program ``demo.c'' was developed which builds with either MPI or LibTomMath (without modification). The
|
||||||
@ -467,27 +480,27 @@ Inversion & 1024 & 5,237,957 & 1,054,158 \\
|
|||||||
Inversion & 2048 & 17,871,944 & 3,459,683 \\
|
Inversion & 2048 & 17,871,944 & 3,459,683 \\
|
||||||
Inversion & 4096 & 66,610,468 & 11,834,556 \\
|
Inversion & 4096 & 66,610,468 & 11,834,556 \\
|
||||||
\hline
|
\hline
|
||||||
Multiply & 128 & 1,426 & 828 \\
|
Multiply & 128 & 1,426 & 451 \\
|
||||||
Multiply & 256 & 2,551 & 1,393 \\
|
Multiply & 256 & 2,551 & 958 \\
|
||||||
Multiply & 512 & 7,913 & 2,926 \\
|
Multiply & 512 & 7,913 & 2,476 \\
|
||||||
Multiply & 1024 & 28,496 & 8,620 \\
|
Multiply & 1024 & 28,496 & 7,927 \\
|
||||||
Multiply & 2048 & 109,897 & 28,967 \\
|
Multiply & 2048 & 109,897 & 282,24 \\
|
||||||
Multiply & 4096 & 469,970 & 105,387 \\
|
Multiply & 4096 & 469,970 & 104,681 \\
|
||||||
\hline
|
\hline
|
||||||
Square & 128 & 1,319 & 869 \\
|
Square & 128 & 1,319 & 511 \\
|
||||||
Square & 256 & 1,776 & 1,362 \\
|
Square & 256 & 1,776 & 947 \\
|
||||||
Square & 512 & 5,399 & 2,571 \\
|
Square & 512 & 5,399 & 2,153 \\
|
||||||
Square & 1024 & 18,991 & 6,332 \\
|
Square & 1024 & 18,991 & 5,733 \\
|
||||||
Square & 2048 & 72,126 & 18,426 \\
|
Square & 2048 & 72,126 & 17,621 \\
|
||||||
Square & 4096 & 306,269 & 74,908 \\
|
Square & 4096 & 306,269 & 70,168 \\
|
||||||
\hline
|
\hline
|
||||||
Exptmod & 512 & 32,021,586 & 5,696,459 \\
|
Exptmod & 512 & 32,021,586 & 4,472,406 \\
|
||||||
Exptmod & 768 & 97,595,492 & 12,428,274 \\
|
Exptmod & 768 & 97,595,492 & 10,427,845 \\
|
||||||
Exptmod & 1024 & 223,302,532 & 22,834,316 \\
|
Exptmod & 1024 & 223,302,532 & 20,561,722 \\
|
||||||
Exptmod & 2048 & 1,682,223,369 & 119,888,049 \\
|
Exptmod & 2048 & 1,682,223,369 & 113,978,803 \\
|
||||||
Exptmod & 2560 & 3,268,615,571 & 250,901,263 \\
|
Exptmod & 2560 & 3,268,615,571 & 236,650,133 \\
|
||||||
Exptmod & 3072 & 5,597,240,141 & 391,716,431 \\
|
Exptmod & 3072 & 5,597,240,141 & 373,449,291 \\
|
||||||
Exptmod & 4096 & 13,347,270,891 & 814,429,647
|
Exptmod & 4096 & 13,347,270,891 & 787,568,457
|
||||||
|
|
||||||
\end{tabular}
|
\end{tabular}
|
||||||
\end{center}
|
\end{center}
|
||||||
|
@ -1,3 +1,8 @@
|
|||||||
|
Jan 1st, 2003
|
||||||
|
v0.07 -- Removed alot of heap operations from core functions to speed them up
|
||||||
|
-- Added a root finding function [and mp_sqrt macro like from MPI]
|
||||||
|
-- Added more to manual
|
||||||
|
|
||||||
Dec 31st, 2002
|
Dec 31st, 2002
|
||||||
v0.06 -- Sped up the s_mp_add, s_mp_sub which inturn sped up mp_invmod, mp_exptmod, etc...
|
v0.06 -- Sped up the s_mp_add, s_mp_sub which inturn sped up mp_invmod, mp_exptmod, etc...
|
||||||
-- Cleaned up the header a bit more
|
-- Cleaned up the header a bit more
|
||||||
|
22
demo.c
22
demo.c
@ -105,10 +105,9 @@ int main(void)
|
|||||||
mp_sub_d(&a, 1, &c);
|
mp_sub_d(&a, 1, &c);
|
||||||
mp_exptmod(&b, &c, &a, &d);
|
mp_exptmod(&b, &c, &a, &d);
|
||||||
mp_toradix(&d, buf, 10);
|
mp_toradix(&d, buf, 10);
|
||||||
printf("b^p-1 == %s\n", buf);
|
printf("b^p-1 == %s\n", buf);
|
||||||
|
|
||||||
#ifdef TIMER
|
#ifdef TIMER
|
||||||
|
|
||||||
mp_read_radix(&a, "340282366920938463463374607431768211455", 10);
|
mp_read_radix(&a, "340282366920938463463374607431768211455", 10);
|
||||||
mp_read_radix(&b, "340282366920938463463574607431768211455", 10);
|
mp_read_radix(&b, "340282366920938463463574607431768211455", 10);
|
||||||
while (a.used * DIGIT_BIT < 8192) {
|
while (a.used * DIGIT_BIT < 8192) {
|
||||||
@ -156,9 +155,6 @@ int main(void)
|
|||||||
printf("Multiplying %d-bit took %llu cycles\n", mp_count_bits(&a), tt / ((ulong64)100000));
|
printf("Multiplying %d-bit took %llu cycles\n", mp_count_bits(&a), tt / ((ulong64)100000));
|
||||||
mp_copy(&b, &a);
|
mp_copy(&b, &a);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
{
|
{
|
||||||
char *primes[] = {
|
char *primes[] = {
|
||||||
@ -177,6 +173,7 @@ int main(void)
|
|||||||
for (rr = 0; rr < mp_count_bits(&a); rr++) {
|
for (rr = 0; rr < mp_count_bits(&a); rr++) {
|
||||||
mp_mul_2d(&b, 1, &b);
|
mp_mul_2d(&b, 1, &b);
|
||||||
b.dp[0] |= lbit();
|
b.dp[0] |= lbit();
|
||||||
|
b.used += 1;
|
||||||
}
|
}
|
||||||
mp_sub_d(&a, 1, &c);
|
mp_sub_d(&a, 1, &c);
|
||||||
mp_mod(&b, &c, &b);
|
mp_mod(&b, &c, &b);
|
||||||
@ -198,7 +195,7 @@ int main(void)
|
|||||||
printf("Exponentiating %d-bit took %llu cycles\n", mp_count_bits(&a), tt / ((ulong64)35));
|
printf("Exponentiating %d-bit took %llu cycles\n", mp_count_bits(&a), tt / ((ulong64)35));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
mp_read_radix(&a, "340282366920938463463374607431768211455", 10);
|
mp_read_radix(&a, "340282366920938463463374607431768211455", 10);
|
||||||
mp_read_radix(&b, "234892374891378913789237289378973232333", 10);
|
mp_read_radix(&b, "234892374891378913789237289378973232333", 10);
|
||||||
while (a.used * DIGIT_BIT < 8192) {
|
while (a.used * DIGIT_BIT < 8192) {
|
||||||
@ -223,6 +220,19 @@ int main(void)
|
|||||||
|
|
||||||
inv_n = expt_n = lcm_n = gcd_n = add_n = sub_n = mul_n = div_n = sqr_n = mul2d_n = div2d_n = 0;
|
inv_n = expt_n = lcm_n = gcd_n = add_n = sub_n = mul_n = div_n = sqr_n = mul2d_n = div2d_n = 0;
|
||||||
for (;;) {
|
for (;;) {
|
||||||
|
|
||||||
|
/* randomly clear and re-init one variable, this has the affect of triming the alloc space */
|
||||||
|
switch (abs(rand()) % 7) {
|
||||||
|
case 0: mp_clear(&a); mp_init(&a); break;
|
||||||
|
case 1: mp_clear(&b); mp_init(&b); break;
|
||||||
|
case 2: mp_clear(&c); mp_init(&c); break;
|
||||||
|
case 3: mp_clear(&d); mp_init(&d); break;
|
||||||
|
case 4: mp_clear(&e); mp_init(&e); break;
|
||||||
|
case 5: mp_clear(&f); mp_init(&f); break;
|
||||||
|
case 6: break; /* don't clear any */
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
printf("%7lu/%7lu/%7lu/%7lu/%7lu/%7lu/%7lu/%7lu/%7lu/%7lu/%7lu/%5d\r", add_n, sub_n, mul_n, div_n, sqr_n, mul2d_n, div2d_n, gcd_n, lcm_n, expt_n, inv_n, _ifuncs);
|
printf("%7lu/%7lu/%7lu/%7lu/%7lu/%7lu/%7lu/%7lu/%7lu/%7lu/%7lu/%5d\r", add_n, sub_n, mul_n, div_n, sqr_n, mul2d_n, div2d_n, gcd_n, lcm_n, expt_n, inv_n, _ifuncs);
|
||||||
fgets(cmd, 4095, stdin);
|
fgets(cmd, 4095, stdin);
|
||||||
cmd[strlen(cmd)-1] = 0;
|
cmd[strlen(cmd)-1] = 0;
|
||||||
|
2
makefile
2
makefile
@ -1,7 +1,7 @@
|
|||||||
CC = gcc
|
CC = gcc
|
||||||
CFLAGS += -DDEBUG -Wall -W -O3 -fomit-frame-pointer -funroll-loops
|
CFLAGS += -DDEBUG -Wall -W -O3 -fomit-frame-pointer -funroll-loops
|
||||||
|
|
||||||
VERSION=0.06
|
VERSION=0.07
|
||||||
|
|
||||||
default: test
|
default: test
|
||||||
|
|
||||||
|
@ -41,7 +41,7 @@ void rand_num(mp_int *a)
|
|||||||
unsigned char buf[512];
|
unsigned char buf[512];
|
||||||
|
|
||||||
top:
|
top:
|
||||||
size = 1 + ((fgetc(rng)*fgetc(rng)) % 32);
|
size = 1 + ((fgetc(rng)*fgetc(rng)) % 512);
|
||||||
buf[0] = (fgetc(rng)&1)?1:0;
|
buf[0] = (fgetc(rng)&1)?1:0;
|
||||||
fread(buf+1, 1, size, rng);
|
fread(buf+1, 1, size, rng);
|
||||||
for (n = 0; n < size; n++) {
|
for (n = 0; n < size; n++) {
|
||||||
@ -57,7 +57,7 @@ void rand_num2(mp_int *a)
|
|||||||
unsigned char buf[512];
|
unsigned char buf[512];
|
||||||
|
|
||||||
top:
|
top:
|
||||||
size = 1 + ((fgetc(rng)*fgetc(rng)) % 32);
|
size = 1 + ((fgetc(rng)*fgetc(rng)) % 512);
|
||||||
buf[0] = (fgetc(rng)&1)?1:0;
|
buf[0] = (fgetc(rng)&1)?1:0;
|
||||||
fread(buf+1, 1, size, rng);
|
fread(buf+1, 1, size, rng);
|
||||||
for (n = 0; n < size; n++) {
|
for (n = 0; n < size; n++) {
|
||||||
|
Loading…
Reference in New Issue
Block a user