added libtommath-0.10

This commit is contained in:
Tom St Denis 2003-02-28 16:07:32 +00:00 committed by Steffen Jaeckel
parent 40c00add00
commit fb93a30a25
10 changed files with 954 additions and 98 deletions

445
bn.c
View File

@ -796,7 +796,7 @@ int mp_mul_2(mp_int *a, mp_int *b)
DECFUNC();
return res;
}
b->used = b->alloc;
++b->used;
/* shift any bit count < DIGIT_BIT */
r = 0;
@ -1017,7 +1017,6 @@ static int fast_s_mp_mul_digs(mp_int *a, mp_int *b, mp_int *c, int digs)
*/
_W = W + ix;
/* the number of digits is limited by their placement. E.g.
we avoid multiplying digits that will end up above the # of
digits of precision requested
@ -2840,11 +2839,404 @@ int mp_reduce(mp_int *x, mp_int *m, mp_int *mu)
return res;
}
/* setups the montgomery reduction stuff */
int mp_montgomery_setup(mp_int *a, mp_digit *mp)
{
mp_int t, tt;
int res;
if ((res = mp_init(&t)) != MP_OKAY) {
return res;
}
if ((res = mp_init(&tt)) != MP_OKAY) {
goto __T;
}
/* tt = b */
tt.dp[0] = 0;
tt.dp[1] = 1;
tt.used = 2;
/* t = m mod b */
t.dp[0] = a->dp[0];
t.used = 1;
/* t = 1/m mod b */
if ((res = mp_invmod(&t, &tt, &t)) != MP_OKAY) {
goto __TT;
}
/* t = -1/m mod b */
*mp = ((mp_digit)1 << ((mp_digit)DIGIT_BIT)) - t.dp[0];
res = MP_OKAY;
__TT: mp_clear(&tt);
__T: mp_clear(&t);
return res;
}
/* computes xR^-1 == x (mod N) via Montgomery Reduction (comba) */
static int fast_mp_montgomery_reduce(mp_int *a, mp_int *m, mp_digit mp)
{
int ix, res, olduse;
mp_digit ui;
mp_word W[512];
/* get old used count */
olduse = a->used;
/* grow a as required */
if (a->alloc < m->used*2+1) {
if ((res = mp_grow(a, m->used*2+1)) != MP_OKAY) {
return res;
}
}
/* copy and clear */
for (ix = 0; ix < a->used; ix++) {
W[ix] = a->dp[ix];
}
for (; ix < m->used * 2 + 1; ix++) {
W[ix] = 0;
}
for (ix = 0; ix < m->used; ix++) {
/* ui = ai * m' mod b */
ui = (((mp_digit)(W[ix] & MP_MASK)) * mp) & MP_MASK;
/* a = a + ui * m * b^i */
{
register int iy;
register mp_digit *tmpx;
register mp_word *_W;
/* aliases */
tmpx = m->dp;
_W = W + ix;
for (iy = 0; iy < m->used; iy++) {
*_W++ += ((mp_word)ui) * ((mp_word)*tmpx++);
}
/* now fix carry for W[ix+1] */
W[ix+1] += W[ix] >> ((mp_word)DIGIT_BIT);
W[ix] &= ((mp_word)MP_MASK);
}
}
/* nox fix rest of carries */
for (; ix <= m->used * 2 + 1; ix++) {
W[ix] += (W[ix-1] >> ((mp_word)DIGIT_BIT));
W[ix-1] &= ((mp_word)MP_MASK);
}
/* copy out */
/* A = A/b^n */
for (ix = 0; ix < m->used + 1; ix++) {
a->dp[ix] = W[ix+m->used];
}
a->used = m->used + 1;
/* zero oldused digits */
for (; ix < olduse; ix++) {
a->dp[ix] = 0;
}
mp_clamp(a);
/* if A >= m then A = A - m */
if (mp_cmp_mag(a, m) != MP_LT) {
if ((res = s_mp_sub(a, m, a)) != MP_OKAY) {
return res;
}
}
return MP_OKAY;
}
/* computes xR^-1 == x (mod N) via Montgomery Reduction */
int mp_montgomery_reduce(mp_int *a, mp_int *m, mp_digit mp)
{
int ix, res, digs;
mp_digit ui;
REGFUNC("mp_montgomery_reduce");
VERIFY(a);
VERIFY(m);
digs = m->used * 2 + 1;
if ((digs < 512) && digs < (1<<( (CHAR_BIT*sizeof(mp_word)) - (2*DIGIT_BIT)))) {
res = fast_mp_montgomery_reduce(a, m, mp);
DECFUNC();
return res;
}
if (a->alloc < m->used*2+1) {
if ((res = mp_grow(a, m->used*2+1)) != MP_OKAY) {
DECFUNC();
return res;
}
}
a->used = m->used * 2 + 1;
for (ix = 0; ix < m->used; ix++) {
/* ui = ai * m' mod b */
ui = (a->dp[ix] * mp) & MP_MASK;
/* a = a + ui * m * b^i */
{
register int iy;
register mp_digit *tmpx, *tmpy, mu;
register mp_word r;
/* aliases */
tmpx = m->dp;
tmpy = a->dp + ix;
mu = 0;
for (iy = 0; iy < m->used; iy++) {
r = ((mp_word)ui) * ((mp_word)*tmpx++) + ((mp_word)mu) + ((mp_word)*tmpy);
mu = (r >> ((mp_word)DIGIT_BIT));
*tmpy++ = (r & ((mp_word)MP_MASK));
}
/* propagate carries */
while (mu) {
*tmpy += mu;
mu = (*tmpy>>DIGIT_BIT)&1;
*tmpy++ &= MP_MASK;
}
}
}
/* A = A/b^n */
mp_rshd(a, m->used);
/* if A >= m then A = A - m */
if (mp_cmp_mag(a, m) != MP_LT) {
if ((res = s_mp_sub(a, m, a)) != MP_OKAY) {
DECFUNC();
return res;
}
}
DECFUNC();
return MP_OKAY;
}
/* computes Y == G^X mod P, HAC pp.616, Algorithm 14.85
*
* Uses a left-to-right k-ary sliding window to compute the modular exponentiation.
* The value of k changes based on the size of the exponent.
*
* Uses Montgomery reduction
*/
static int mp_exptmod_fast(mp_int *G, mp_int *X, mp_int *P, mp_int *Y)
{
mp_int M[64], res;
mp_digit buf, mp;
int err, bitbuf, bitcpy, bitcnt, mode, digidx, x, y, winsize;
REGFUNC("mp_exptmod_fast");
VERIFY(G);
VERIFY(X);
VERIFY(P);
VERIFY(Y);
/* find window size */
x = mp_count_bits(X);
if (x <= 18) { winsize = 2; }
else if (x <= 84) { winsize = 3; }
else if (x <= 300) { winsize = 4; }
else if (x <= 930) { winsize = 5; }
else { winsize = 6; }
/* init G array */
for (x = 0; x < (1<<winsize); x++) {
if ((err = mp_init_size(&M[x], 1)) != MP_OKAY) {
for (y = 0; y < x; y++) {
mp_clear(&M[y]);
}
DECFUNC();
return err;
}
}
/* now setup montgomery */
if ((err = mp_montgomery_setup(P, &mp)) != MP_OKAY) {
goto __M;
}
/* setup result */
if ((err = mp_init(&res)) != MP_OKAY) {
goto __M;
}
/* now we need R mod m */
mp_set(&res, 1);
if ((err = mp_lshd(&res, P->used)) != MP_OKAY) {
goto __RES;
}
/* res = R mod m */
if ((err = mp_mod(&res, P, &res)) != MP_OKAY) {
goto __RES;
}
/* create M table
*
* The M table contains powers of the input base, e.g. M[x] = G^x mod P
*
* The first half of the table is not computed though accept for M[0] and M[1]
*/
mp_set(&M[0], 1);
if ((err = mp_mod(G, P, &M[1])) != MP_OKAY) {
goto __RES;
}
/* now set M[1] to G * R mod m */
if ((err = mp_mulmod(&M[1], &res, P, &M[1])) != MP_OKAY) {
goto __RES;
}
/* compute the value at M[1<<(winsize-1)] by squaring M[1] (winsize-1) times */
if ((err = mp_copy(&M[1], &M[1<<(winsize-1)])) != MP_OKAY) {
goto __RES;
}
for (x = 0; x < (winsize-1); x++) {
if ((err = mp_sqr(&M[1<<(winsize-1)], &M[1<<(winsize-1)])) != MP_OKAY) {
goto __RES;
}
if ((err = mp_montgomery_reduce(&M[1<<(winsize-1)], P, mp)) != MP_OKAY) {
goto __RES;
}
}
/* create upper table */
for (x = (1<<(winsize-1))+1; x < (1 << winsize); x++) {
if ((err = mp_mul(&M[x-1], &M[1], &M[x])) != MP_OKAY) {
goto __RES;
}
if ((err = mp_montgomery_reduce(&M[x], P, mp)) != MP_OKAY) {
goto __RES;
}
}
/* set initial mode and bit cnt */
mode = 0;
bitcnt = 0;
buf = 0;
digidx = X->used - 1;
bitcpy = bitbuf = 0;
bitcnt = 1;
for (;;) {
/* grab next digit as required */
if (--bitcnt == 0) {
if (digidx == -1) {
break;
}
buf = X->dp[digidx--];
bitcnt = (int)DIGIT_BIT;
}
/* grab the next msb from the exponent */
y = (buf >> (DIGIT_BIT - 1)) & 1;
buf <<= 1;
/* if the bit is zero and mode == 0 then we ignore it
* These represent the leading zero bits before the first 1 bit
* in the exponent. Technically this opt is not required but it
* does lower the # of trivial squaring/reductions used
*/
if (mode == 0 && y == 0) continue;
/* if the bit is zero and mode == 1 then we square */
if (y == 0 && mode == 1) {
if ((err = mp_sqr(&res, &res)) != MP_OKAY) {
goto __RES;
}
if ((err = mp_montgomery_reduce(&res, P, mp)) != MP_OKAY) {
goto __RES;
}
continue;
}
/* else we add it to the window */
bitbuf |= (y<<(winsize-++bitcpy));
mode = 2;
if (bitcpy == winsize) {
/* ok window is filled so square as required and multiply multiply */
/* square first */
for (x = 0; x < winsize; x++) {
if ((err = mp_sqr(&res, &res)) != MP_OKAY) {
goto __RES;
}
if ((err = mp_montgomery_reduce(&res, P, mp)) != MP_OKAY) {
goto __RES;
}
}
/* then multiply */
if ((err = mp_mul(&res, &M[bitbuf], &res)) != MP_OKAY) {
goto __RES;
}
if ((err = mp_montgomery_reduce(&res, P, mp)) != MP_OKAY) {
goto __RES;
}
/* empty window and reset */
bitcpy = bitbuf = 0;
mode = 1;
}
}
/* if bits remain then square/multiply */
if (mode == 2 && bitcpy > 0) {
/* square then multiply if the bit is set */
for (x = 0; x < bitcpy; x++) {
if ((err = mp_sqr(&res, &res)) != MP_OKAY) {
goto __RES;
}
if ((err = mp_montgomery_reduce(&res, P, mp)) != MP_OKAY) {
goto __RES;
}
bitbuf <<= 1;
if ((bitbuf & (1<<winsize)) != 0) {
/* then multiply */
if ((err = mp_mul(&res, &M[1], &res)) != MP_OKAY) {
goto __RES;
}
if ((err = mp_montgomery_reduce(&res, P, mp)) != MP_OKAY) {
goto __RES;
}
}
}
}
/* fixup result */
if ((err = mp_montgomery_reduce(&res, P, mp)) != MP_OKAY) {
goto __RES;
}
mp_exch(&res, Y);
err = MP_OKAY;
__RES: mp_clear(&res);
__M :
for (x = 0; x < (1<<winsize); x++) {
mp_clear(&M[x]);
}
DECFUNC();
return err;
}
int mp_exptmod(mp_int *G, mp_int *X, mp_int *P, mp_int *Y)
{
mp_int M[64], res, mu;
@ -2857,6 +3249,13 @@ int mp_exptmod(mp_int *G, mp_int *X, mp_int *P, mp_int *Y)
VERIFY(P);
VERIFY(Y);
/* if the modulus is odd use the fast method */
if (mp_isodd(P) == 1 && P->used > 4 && P->used < MONTGOMERY_EXPT_CUTOFF) {
err = mp_exptmod_fast(G, X, P, Y);
DECFUNC();
return err;
}
/* find window size */
x = mp_count_bits(X);
if (x <= 18) { winsize = 2; }
@ -2918,7 +3317,8 @@ int mp_exptmod(mp_int *G, mp_int *X, mp_int *P, mp_int *Y)
goto __MU;
}
}
/* init result */
/* setup result */
if ((err = mp_init(&res)) != MP_OKAY) {
goto __MU;
}
@ -3009,10 +3409,10 @@ int mp_exptmod(mp_int *G, mp_int *X, mp_int *P, mp_int *Y)
if ((bitbuf & (1<<winsize)) != 0) {
/* then multiply */
if ((err = mp_mul(&res, &M[1], &res)) != MP_OKAY) {
goto __MU;
goto __RES;
}
if ((err = mp_reduce(&res, P, &mu)) != MP_OKAY) {
goto __MU;
goto __RES;
}
}
}
@ -3060,10 +3460,8 @@ int mp_n_root(mp_int *a, mp_digit b, mp_int *c)
neg = a->sign;
a->sign = MP_ZPOS;
/* t2 = a */
if ((res = mp_copy(a, &t2)) != MP_OKAY) {
goto __T3;
}
/* t2 = 2 */
mp_set(&t2, 2);
do {
/* t1 = t2 */
@ -3098,15 +3496,19 @@ int mp_n_root(mp_int *a, mp_digit b, mp_int *c)
}
} while (mp_cmp(&t1, &t2) != MP_EQ);
/* result can be at most off by one so check */
if ((res = mp_expt_d(&t1, b, &t2)) != MP_OKAY) {
goto __T3;
}
if (mp_cmp(&t2, a) == MP_GT) {
if ((res = mp_sub_d(&t1, 1, &t1)) != MP_OKAY) {
/* result can be off by a few so check */
for (;;) {
if ((res = mp_expt_d(&t1, b, &t2)) != MP_OKAY) {
goto __T3;
}
if (mp_cmp(&t2, a) == MP_GT) {
if ((res = mp_sub_d(&t1, 1, &t1)) != MP_OKAY) {
goto __T3;
}
} else {
break;
}
}
/* reset the sign of a first */
@ -3336,13 +3738,14 @@ int mp_to_signed_bin(mp_int *a, unsigned char *b)
/* get the size for an unsigned equivalent */
int mp_unsigned_bin_size(mp_int *a)
{
return (mp_count_bits(a)/8 + ((mp_count_bits(a)&7) != 0 ? 1 : 0));
int size = mp_count_bits(a);
return (size/8 + ((size&7) != 0 ? 1 : 0));
}
/* get the size for an signed equivalent */
int mp_signed_bin_size(mp_int *a)
{
return 1 + (mp_count_bits(a)/8 + ((mp_count_bits(a)&7) != 0 ? 1 : 0));
return 1 + mp_unsigned_bin_size(a);
}
/* read a string [ASCII] in a given radix */
@ -3431,7 +3834,10 @@ int mp_radix_size(mp_int *a, int radix)
mp_int t;
mp_digit d;
digs = 0;
/* special case for binary */
if (radix == 2) {
return mp_count_bits(a) + (a->sign == MP_NEG ? 1 : 0) + 1;
}
if (radix < 2 || radix > 64) {
return 0;
@ -3441,6 +3847,7 @@ int mp_radix_size(mp_int *a, int radix)
return 0;
}
digs = 0;
if (t.sign == MP_NEG) {
++digs;
t.sign = MP_ZPOS;

9
bn.h
View File

@ -84,6 +84,7 @@ typedef int mp_err;
/* you'll have to tune these... */
#define KARATSUBA_MUL_CUTOFF 80 /* Min. number of digits before Karatsuba multiplication is used. */
#define KARATSUBA_SQR_CUTOFF 80 /* Min. number of digits before Karatsuba squaring is used. */
#define MONTGOMERY_EXPT_CUTOFF 40 /* max. number of digits that montgomery reductions will help for */
#define MP_PREC 64 /* default digits of precision */
@ -114,7 +115,7 @@ int mp_shrink(mp_int *a);
#define mp_iszero(a) (((a)->used == 0) ? 1 : 0)
#define mp_iseven(a) (((a)->used == 0 || (((a)->dp[0] & 1) == 0)) ? 1 : 0)
#define mp_isodd(a) (((a)->used > 0 || (((a)->dp[0] & 1) == 1)) ? 1 : 0)
#define mp_isodd(a) (((a)->used > 0 && (((a)->dp[0] & 1) == 1)) ? 1 : 0)
/* set to zero */
void mp_zero(mp_int *a);
@ -257,6 +258,12 @@ int mp_reduce_setup(mp_int *a, mp_int *b);
*/
int mp_reduce(mp_int *a, mp_int *b, mp_int *c);
/* setups the montgomery reduction */
int mp_montgomery_setup(mp_int *a, mp_digit *mp);
/* computes xR^-1 == x (mod N) via Montgomery Reduction */
int mp_montgomery_reduce(mp_int *a, mp_int *m, mp_digit mp);
/* d = a^b (mod c) */
int mp_exptmod(mp_int *a, mp_int *b, mp_int *c, mp_int *d);

BIN
bn.pdf

Binary file not shown.

59
bn.tex
View File

@ -1,7 +1,7 @@
\documentclass{article}
\begin{document}
\title{LibTomMath v0.09 \\ A Free Multiple Precision Integer Library}
\title{LibTomMath v0.10 \\ A Free Multiple Precision Integer Library}
\author{Tom St Denis \\ tomstdenis@iahu.ca}
\maketitle
\newpage
@ -279,6 +279,22 @@ int mp_n_root(mp_int *a, mp_digit b, mp_int *c);
/* computes the jacobi c = (a | n) (or Legendre if b is prime) */
int mp_jacobi(mp_int *a, mp_int *n, int *c);
/* used to setup the Barrett reduction for a given modulus b */
int mp_reduce_setup(mp_int *a, mp_int *b);
/* Barrett Reduction, computes a (mod b) with a precomputed value c
*
* Assumes that 0 < a <= b^2, note if 0 > a > -(b^2) then you can merely
* compute the reduction as -1 * mp_reduce(mp_abs(a)) [pseudo code].
*/
int mp_reduce(mp_int *a, mp_int *b, mp_int *c);
/* setups the montgomery reduction */
int mp_montgomery_setup(mp_int *a, mp_digit *mp);
/* computes xR^-1 == x (mod N) via Montgomery Reduction */
int mp_montgomery_reduce(mp_int *a, mp_int *m, mp_digit mp);
/* d = a^b (mod c) */
int mp_exptmod(mp_int *a, mp_int *b, mp_int *c, mp_int *d);
\end{verbatim}
@ -456,16 +472,33 @@ it is not.
\subsubsection{mp\_exptmod(mp\_int *a, mp\_int *b, mp\_int *c, mp\_int *d)}
Computes $d = a^b \mbox{ (mod }c\mbox{)}$ using a sliding window $k$-ary exponentiation algorithm. For an $\alpha$-bit
exponent it performs $\alpha$ squarings and at most $\lfloor \alpha/k \rfloor + k$ multiplications. The value of $k$ is
chosen to minimize the number of multiplications required for a given value of $\alpha$. Barrett reductions are used
to reduce the squared or multiplied temporary results modulo $c$. A Barrett reduction requires one division that is
performed only and two half multipliers of $N$ digit numbers resulting in approximation $O((N^2)/2)$ work.
chosen to minimize the number of multiplications required for a given value of $\alpha$. Barrett or Montgomery
reductions are used to reduce the squared or multiplied temporary results modulo $c$.
Let $\gamma = \lfloor \alpha/k \rfloor + k$ represent the total multiplications. The total work of a exponentiation is
therefore, $O(3 \cdot N^2 + (\alpha + \gamma) \cdot ((N^2)/2) + \alpha \cdot ((N^2 + N)/2) + \gamma \cdot N^2)$ which
simplies to $O(3 \cdot N^2 + \gamma N^2 + \alpha (N^2 + (N/2)))$. The bulk of the time is spent in the Barrett
reductions and the squaring algorithms. Since $\gamma < \alpha$ it makes sense to optimize first the Barrett and
squaring routines first. Significant improvements in the future will most likely stem from optimizing these instead
of optimizing the multipliers.
\subsection{Fast Modular Reductions}
\subsubsection{mp\_reduce(mp\_int *a, mp\_int *b, mp\_int *c)}
Computes a Barrett reduction in-place of $a$ modulo $b$ with respect to $c$. In essence it computes
$a \equiv a \mbox{ (mod }b\mbox{)}$ provided $0 \le a \le b^2$. The value of $c$ is precomputed with the
function mp\_reduce\_setup().
The Barrett reduction function has been optimized to use partial multipliers which means compared to MPI it performs
have the number of single precision multipliers (\textit{provided they have the same size digits}). The partial
multipliers (\textit{one of which is shared with mp\_mul}) both have baseline and comba variants. Barrett reduction
can reduce a number modulo a $n-$digit modulus with approximately $2n^2$ single precision multiplications.
\subsubsection{mp\_montgomery\_reduce(mp\_int *a, mp\_int *m, mp\_digit mp)}
Computes a Montgomery reduction in-place of $a$ modulo $b$ with respect to $mp$. If $b$ is some $n-$digit modulus then
$R = \beta^{n+1}$. The result of this function is $aR^{-1} \mbox{ (mod }b\mbox{)}$ provided that $0 \le a \le b^2$.
The value of $mp$ is precomputed with the function mp\_montgomery\_setup().
The Montgomery reduction comes in two variants. A standard baseline and a fast comba method. The baseline routine
is in fact slower than the Barrett reductions, however, the comba routine is much faster. Montomgery reduction can
reduce a number modulo a $n-$digit modulus with approximately $n^2 + n$ single precision multiplications.
Note that the final result of a Montgomery reduction is not just the value reduced modulo $b$. You have to multiply
by $R$ modulo $b$ to get the real result. At first that may not seem like such a worthwhile routine, however, the
exptmod function can be made to take advantage of this such that only one normalization at the end is required.
\section{Timing Analysis}
\subsection{Observed Timings}
@ -503,9 +536,9 @@ Square & 1024 & 18,991 & 5,733 \\
Square & 2048 & 72,126 & 17,621 \\
Square & 4096 & 306,269 & 67,576 \\
\hline
Exptmod & 512 & 32,021,586 & 4,138,354 \\
Exptmod & 768 & 97,595,492 & 9,840,233 \\
Exptmod & 1024 & 223,302,532 & 20,624,553 \\
Exptmod & 512 & 32,021,586 & 3,118,435 \\
Exptmod & 768 & 97,595,492 & 8,493,633 \\
Exptmod & 1024 & 223,302,532 & 17,715,899 \\
Exptmod & 2048 & 1,682,223,369 & 114,936,361 \\
Exptmod & 2560 & 3,268,615,571 & 229,402,426 \\
Exptmod & 3072 & 5,597,240,141 & 367,403,840 \\

View File

@ -1,3 +1,8 @@
Jan 9th, 2003
v0.10 -- Pekka Riikonen suggested fixes to the radix conversion code.
-- Added baseline montgomery and comba montgomery reductions, sped up exptmods
[to a point, see bn.h for MONTGOMERY_EXPT_CUTOFF]
Jan 6th, 2003
v0.09 -- Updated the manual to reflect recent changes. :-)
-- Added Jacobi function (mp_jacobi) to supplement the number theory side of the lib

41
demo.c
View File

@ -21,12 +21,15 @@
#define TIMER
extern ulong64 rdtsc(void);
extern void reset(void);
#else
#endif
#ifdef TIMER
#ifndef TIMER_X86
ulong64 _tt;
void reset(void) { _tt = clock(); }
ulong64 rdtsc(void) { return clock() - _tt; }
#endif
#endif
#ifndef DEBUG
int _ifuncs;
@ -82,6 +85,7 @@ int main(void)
mp_int a, b, c, d, e, f;
unsigned long expt_n, add_n, sub_n, mul_n, div_n, sqr_n, mul2d_n, div2d_n, gcd_n, lcm_n, inv_n;
int rr;
mp_digit tom;
#ifdef TIMER
int n;
@ -95,6 +99,28 @@ int main(void)
mp_init(&e);
mp_init(&f);
mp_read_radix(&a, "59994534535345535344389423", 10);
mp_read_radix(&b, "49993453555234234565675534", 10);
mp_read_radix(&c, "62398923474472948723847281", 10);
mp_mulmod(&a, &b, &c, &f);
/* setup mont */
mp_montgomery_setup(&c, &tom);
mp_mul(&a, &b, &a);
mp_montgomery_reduce(&a, &c, tom);
mp_montgomery_reduce(&a, &c, tom);
mp_lshd(&a, c.used*2);
mp_mod(&a, &c, &a);
mp_toradix(&a, cmd, 10);
printf("%s\n\n", cmd);
mp_toradix(&f, cmd, 10);
printf("%s\n", cmd);
/* return 0; */
mp_read_radix(&a, "V//////////////////////////////////////////////////////////////////////////////////////", 64);
mp_reduce_setup(&b, &a);
printf("\n\n----\n\n");
@ -103,20 +129,7 @@ int main(void)
mp_read_radix(&b, "4982748972349724892742", 10);
mp_sub_d(&a, 1, &c);
#ifdef DEBUG
mp_sqr(&a, &a);mp_sqr(&c, &c);
mp_sqr(&a, &a);mp_sqr(&c, &c);
mp_sqr(&a, &a);mp_sqr(&c, &c);
reset_timings();
#endif
mp_exptmod(&b, &c, &a, &d);
#ifdef DEBUG
dump_timings();
return 0;
#endif
mp_toradix(&d, buf, 10);
printf("b^p-1 == %s\n", buf);

View File

@ -85,10 +85,11 @@ static mp_digit prime_digit()
}
/* makes a prime of at least k bits */
int pprime(int k, mp_int *p, mp_int *q)
int pprime(int k, int li, mp_int *p, mp_int *q)
{
mp_int a, b, c, n, x, y, z, v;
int res;
int res, ii;
static const mp_digit bases[] = { 2, 3, 5, 7, 11, 13, 17, 19 };
/* single digit ? */
if (k <= (int)DIGIT_BIT) {
@ -167,55 +168,60 @@ int pprime(int k, mp_int *p, mp_int *q)
if (mp_cmp_d(&y, 1) != MP_EQ) goto top;
/* now try base x=2 */
mp_set(&x, 2);
/* now try base x=bases[ii] */
for (ii = 0; ii < li; ii++) {
mp_set(&x, bases[ii]);
/* compute x^a mod n */
if ((res = mp_exptmod(&x, &a, &n, &y)) != MP_OKAY) { /* y = x^a mod n */
goto __Z;
/* compute x^a mod n */
if ((res = mp_exptmod(&x, &a, &n, &y)) != MP_OKAY) { /* y = x^a mod n */
goto __Z;
}
/* if y == 1 loop */
if (mp_cmp_d(&y, 1) == MP_EQ) continue;
/* now x^2a mod n */
if ((res = mp_sqrmod(&y, &n, &y)) != MP_OKAY) { /* y = x^2a mod n */
goto __Z;
}
if (mp_cmp_d(&y, 1) == MP_EQ) continue;
/* compute x^b mod n */
if ((res = mp_exptmod(&x, &b, &n, &y)) != MP_OKAY) { /* y = x^b mod n */
goto __Z;
}
/* if y == 1 loop */
if (mp_cmp_d(&y, 1) == MP_EQ) continue;
/* now x^2b mod n */
if ((res = mp_sqrmod(&y, &n, &y)) != MP_OKAY) { /* y = x^2b mod n */
goto __Z;
}
if (mp_cmp_d(&y, 1) == MP_EQ) continue;
/* compute x^c mod n == x^ab mod n */
if ((res = mp_exptmod(&x, &c, &n, &y)) != MP_OKAY) { /* y = x^ab mod n */
goto __Z;
}
/* if y == 1 loop */
if (mp_cmp_d(&y, 1) == MP_EQ) continue;
/* now compute (x^c mod n)^2 */
if ((res = mp_sqrmod(&y, &n, &y)) != MP_OKAY) { /* y = x^2ab mod n */
goto __Z;
}
/* y should be 1 */
if (mp_cmp_d(&y, 1) != MP_EQ) continue;
break;
}
/* if y == 1 loop */
if (mp_cmp_d(&y, 1) == MP_EQ) goto top;
/* now x^2a mod n */
if ((res = mp_sqrmod(&y, &n, &y)) != MP_OKAY) { /* y = x^2a mod n */
goto __Z;
}
if (mp_cmp_d(&y, 1) == MP_EQ) goto top;
/* compute x^b mod n */
if ((res = mp_exptmod(&x, &b, &n, &y)) != MP_OKAY) { /* y = x^b mod n */
goto __Z;
}
/* if y == 1 loop */
if (mp_cmp_d(&y, 1) == MP_EQ) goto top;
/* now x^2b mod n */
if ((res = mp_sqrmod(&y, &n, &y)) != MP_OKAY) { /* y = x^2b mod n */
goto __Z;
}
if (mp_cmp_d(&y, 1) == MP_EQ) goto top;
/* compute x^c mod n == x^ab mod n */
if ((res = mp_exptmod(&x, &c, &n, &y)) != MP_OKAY) { /* y = x^ab mod n */
goto __Z;
}
/* if y == 1 loop */
if (mp_cmp_d(&y, 1) == MP_EQ) goto top;
/* now compute (x^c mod n)^2 */
if ((res = mp_sqrmod(&y, &n, &y)) != MP_OKAY) { /* y = x^2ab mod n */
goto __Z;
}
/* y should be 1 */
if (mp_cmp_d(&y, 1) != MP_EQ) goto top;
/* no bases worked? */
if (ii == li) goto top;
/*
{
@ -234,8 +240,12 @@ int pprime(int k, mp_int *p, mp_int *q)
mp_copy(&n, &a);
}
/* get q to be the order of the large prime subgroup */
mp_sub_d(&n, 1, q);
mp_div_2(q, q);
mp_div(q, &b, q, NULL);
mp_exch(&n, p);
mp_exch(&b, q);
res = MP_OKAY;
__Z: mp_clear(&z);
@ -254,19 +264,25 @@ int main(void)
{
mp_int p, q;
char buf[4096];
int k;
int k, li;
clock_t t1;
srand(time(NULL));
printf("Enter # of bits: \n");
scanf("%d", &k);
fgets(buf, sizeof(buf), stdin);
sscanf(buf, "%d", &k);
printf("Enter number of bases to try (1 to 8):\n");
fgets(buf, sizeof(buf), stdin);
sscanf(buf, "%d", &li);
mp_init(&p);
mp_init(&q);
t1 = clock();
pprime(k, &p, &q);
pprime(k, li, &p, &q);
t1 = clock() - t1;
printf("\n\nTook %ld ticks, %d bits\n", t1, mp_count_bits(&p));

View File

@ -1,7 +1,7 @@
CC = gcc
CFLAGS += -Wall -W -Wshadow -ansi -O3 -fomit-frame-pointer -funroll-loops
VERSION=0.09
VERSION=0.10
default: test

302
poly.c Normal file
View File

@ -0,0 +1,302 @@
/* LibTomMath, multiple-precision integer library -- Tom St Denis
*
* LibTomMath is library that provides for multiple-precision
* integer arithmetic as well as number theoretic functionality.
*
* This file "poly.c" provides GF(p^k) functionality on top of the
* libtommath library.
*
* The library is designed directly after the MPI library by
* Michael Fromberger but has been written from scratch with
* additional optimizations in place.
*
* The library is free for all purposes without any express
* guarantee it works.
*
* Tom St Denis, tomstdenis@iahu.ca, http://libtommath.iahu.ca
*/
#include "poly.h"
#undef MIN
#define MIN(x,y) ((x)<(y)?(x):(y))
#undef MAX
#define MAX(x,y) ((x)>(y)?(x):(y))
static void s_free(mp_poly *a)
{
int k;
for (k = 0; k < a->alloc; k++) {
mp_clear(&(a->co[k]));
}
}
static int s_setup(mp_poly *a, int low, int high)
{
int res, k, j;
for (k = low; k < high; k++) {
if ((res = mp_init(&(a->co[k]))) != MP_OKAY) {
for (j = low; j < k; j++) {
mp_clear(&(a->co[j]));
}
return MP_MEM;
}
}
return MP_OKAY;
}
int mp_poly_init(mp_poly *a, mp_int *cha)
{
return mp_poly_init_size(a, cha, MP_POLY_PREC);
}
/* init a poly of a given (size) degree */
int mp_poly_init_size(mp_poly *a, mp_int *cha, int size)
{
int res;
/* allocate array of mp_ints for coefficients */
a->co = malloc(size * sizeof(mp_int));
if (a->co == NULL) {
return MP_MEM;
}
a->used = 0;
a->alloc = size;
/* now init the range */
if ((res = s_setup(a, 0, size)) != MP_OKAY) {
free(a->co);
return res;
}
/* copy characteristic */
if ((res = mp_init_copy(&(a->cha), cha)) != MP_OKAY) {
s_free(a);
free(a->co);
return res;
}
/* return ok at this point */
return MP_OKAY;
}
/* grow the size of a poly */
static int mp_poly_grow(mp_poly *a, int size)
{
int res;
if (size > a->alloc) {
/* resize the array of coefficients */
a->co = realloc(a->co, sizeof(mp_int) * size);
if (a->co == NULL) {
return MP_MEM;
}
/* now setup the coefficients */
if ((res = s_setup(a, a->alloc, a->alloc + size)) != MP_OKAY) {
return res;
}
a->alloc += size;
}
return MP_OKAY;
}
/* copy, b = a */
int mp_poly_copy(mp_poly *a, mp_poly *b)
{
int res, k;
/* resize b */
if ((res = mp_poly_grow(b, a->used)) != MP_OKAY) {
return res;
}
/* now copy the used part */
b->used = a->used;
/* now the cha */
if ((res = mp_copy(&(a->cha), &(b->cha))) != MP_OKAY) {
return res;
}
/* now all the coefficients */
for (k = 0; k < b->used; k++) {
if ((res = mp_copy(&(a->co[k]), &(b->co[k]))) != MP_OKAY) {
return res;
}
}
/* now zero the top */
for (k = b->used; k < b->alloc; k++) {
mp_zero(&(b->co[k]));
}
return MP_OKAY;
}
/* init from a copy, a = b */
int mp_poly_init_copy(mp_poly *a, mp_poly *b)
{
int res;
if ((res = mp_poly_init(a, &(b->cha))) != MP_OKAY) {
return res;
}
return mp_poly_copy(b, a);
}
/* free a poly from ram */
void mp_poly_clear(mp_poly *a)
{
s_free(a);
mp_clear(&(a->cha));
free(a->co);
a->co = NULL;
a->used = a->alloc = 0;
}
/* exchange two polys */
void mp_poly_exch(mp_poly *a, mp_poly *b)
{
mp_poly t;
t = *a; *a = *b; *b = t;
}
/* clamp the # of used digits */
static void mp_poly_clamp(mp_poly *a)
{
while (a->used > 0 && mp_cmp_d(&(a->co[a->used]), 0) == MP_EQ) --(a->used);
}
/* add two polynomials, c(x) = a(x) + b(x) */
int mp_poly_add(mp_poly *a, mp_poly *b, mp_poly *c)
{
mp_poly t, *x, *y;
int res, k;
/* ensure char's are the same */
if (mp_cmp(&(a->cha), &(b->cha)) != MP_EQ) {
return MP_VAL;
}
/* now figure out the sizes such that x is the
largest degree poly and y is less or equal in degree
*/
if (a->used > b->used) {
x = a;
y = b;
} else {
x = b;
y = a;
}
/* now init the result to be a copy of the largest */
if ((res = mp_poly_init_copy(&t, x)) != MP_OKAY) {
return res;
}
/* now add the coeffcients of the smaller one */
for (k = 0; k < y->used; k++) {
if ((res = mp_addmod(&(a->co[k]), &(b->co[k]), &(a->cha), &(t.co[k]))) != MP_OKAY) {
goto __T;
}
}
mp_poly_clamp(&t);
mp_poly_exch(&t, c);
res = MP_OKAY;
__T: mp_poly_clear(&t);
return res;
}
/* subtracts two polynomials, c(x) = a(x) - b(x) */
int mp_poly_sub(mp_poly *a, mp_poly *b, mp_poly *c)
{
mp_poly t, *x, *y;
int res, k;
/* ensure char's are the same */
if (mp_cmp(&(a->cha), &(b->cha)) != MP_EQ) {
return MP_VAL;
}
/* now figure out the sizes such that x is the
largest degree poly and y is less or equal in degree
*/
if (a->used > b->used) {
x = a;
y = b;
} else {
x = b;
y = a;
}
/* now init the result to be a copy of the largest */
if ((res = mp_poly_init_copy(&t, x)) != MP_OKAY) {
return res;
}
/* now add the coeffcients of the smaller one */
for (k = 0; k < y->used; k++) {
if ((res = mp_submod(&(a->co[k]), &(b->co[k]), &(a->cha), &(t.co[k]))) != MP_OKAY) {
goto __T;
}
}
mp_poly_clamp(&t);
mp_poly_exch(&t, c);
res = MP_OKAY;
__T: mp_poly_clear(&t);
return res;
}
/* multiplies two polynomials, c(x) = a(x) * b(x) */
int mp_poly_mul(mp_poly *a, mp_poly *b, mp_poly *c)
{
mp_poly t;
mp_int tt;
int res, pa, pb, ix, iy;
/* ensure char's are the same */
if (mp_cmp(&(a->cha), &(b->cha)) != MP_EQ) {
return MP_VAL;
}
/* degrees of a and b */
pa = a->used;
pb = b->used;
/* now init the temp polynomial to be of degree pa+pb */
if ((res = mp_poly_init_size(&t, &(a->cha), pa+pb)) != MP_OKAY) {
return res;
}
/* now init our temp int */
if ((res = mp_init(&tt)) != MP_OKAY) {
goto __T;
}
/* now loop through all the digits */
for (ix = 0; ix < pa; ix++) {
for (iy = 0; iy < pb; iy++) {
if ((res = mp_mul(&(a->co[ix]), &(b->co[iy]), &tt)) != MP_OKAY) {
goto __TT;
}
if ((res = mp_addmod(&tt, &(t.co[ix+iy]), &(a->cha), &(t.co[ix+iy]))) != MP_OKAY) {
goto __TT;
}
}
}
mp_poly_clamp(&t);
mp_poly_exch(&t, c);
res = MP_OKAY;
__TT: mp_clear(&tt);
__T: mp_poly_clear(&t);
return res;
}

73
poly.h Normal file
View File

@ -0,0 +1,73 @@
/* LibTomMath, multiple-precision integer library -- Tom St Denis
*
* LibTomMath is library that provides for multiple-precision
* integer arithmetic as well as number theoretic functionality.
*
* This file "poly.h" provides GF(p^k) functionality on top of the
* libtommath library.
*
* The library is designed directly after the MPI library by
* Michael Fromberger but has been written from scratch with
* additional optimizations in place.
*
* The library is free for all purposes without any express
* guarantee it works.
*
* Tom St Denis, tomstdenis@iahu.ca, http://libtommath.iahu.ca
*/
#ifndef POLY_H_
#define POLY_H_
#include "bn.h"
/* a mp_poly is basically a derived "class" of a mp_int
* it uses the same technique of growing arrays via
* used/alloc parameters except the base unit or "digit"
* is in fact a mp_int. These hold the coefficients
* of the polynomial
*/
typedef struct {
int used, /* coefficients used */
alloc; /* coefficients allocated (and initialized) */
mp_int *co, /* coefficients */
cha; /* characteristic */
} mp_poly;
#define MP_POLY_PREC 16 /* default coefficients allocated */
/* init a poly */
int mp_poly_init(mp_poly *a, mp_int *cha);
/* init a poly of a given (size) degree */
int mp_poly_init_size(mp_poly *a, mp_int *cha, int size);
/* copy, b = a */
int mp_poly_copy(mp_poly *a, mp_poly *b);
/* init from a copy, a = b */
int mp_poly_init_copy(mp_poly *a, mp_poly *b);
/* free a poly from ram */
void mp_poly_clear(mp_poly *a);
/* exchange two polys */
void mp_poly_exch(mp_poly *a, mp_poly *b);
/* ---> Basic Arithmetic <--- */
/* add two polynomials, c(x) = a(x) + b(x) */
int mp_poly_add(mp_poly *a, mp_poly *b, mp_poly *c);
/* subtracts two polynomials, c(x) = a(x) - b(x) */
int mp_poly_sub(mp_poly *a, mp_poly *b, mp_poly *c);
/* multiplies two polynomials, c(x) = a(x) * b(x) */
int mp_poly_mul(mp_poly *a, mp_poly *b, mp_poly *c);
#endif