added libtommath-0.08
This commit is contained in:
parent
3cd7000342
commit
2cfbb89142
391
bn.c
391
bn.c
@ -25,17 +25,116 @@ static const char *s_rmap =
|
||||
|
||||
#ifdef DEBUG
|
||||
|
||||
static char *_funcs[1000];
|
||||
int _ifuncs;
|
||||
/* timing data */
|
||||
#ifdef TIMER_X86
|
||||
extern ulong64 gettsc(void);
|
||||
#else
|
||||
ulong64 gettsc(void) { return clock(); }
|
||||
#endif
|
||||
|
||||
#define REGFUNC(name) { if (_ifuncs == 999) { printf("TROUBLE\n"); exit(0); } _funcs[_ifuncs++] = name; }
|
||||
#define DECFUNC() --_ifuncs;
|
||||
/* structure to hold timing data */
|
||||
struct {
|
||||
char *func;
|
||||
ulong64 start, end, tot;
|
||||
} timings[1000000];
|
||||
|
||||
/* structure to hold consolidated timing data */
|
||||
struct _functime {
|
||||
char *func;
|
||||
ulong64 tot;
|
||||
} functime[1000];
|
||||
|
||||
static char *_funcs[1000];
|
||||
int _ifuncs, _itims;
|
||||
|
||||
#define REGFUNC(name) int __IX = _itims++; _funcs[_ifuncs++] = name; timings[__IX].func = name; timings[__IX].start = gettsc();
|
||||
#define DECFUNC() timings[__IX].end = gettsc(); --_ifuncs;
|
||||
#define VERIFY(val) _verify(val, #val, __LINE__);
|
||||
|
||||
/* sort the consolidated timings */
|
||||
int qsort_helper(const void *A, const void *B)
|
||||
{
|
||||
struct _functime *a, *b;
|
||||
|
||||
a = (struct _functime *)A;
|
||||
b = (struct _functime *)B;
|
||||
|
||||
if (a->tot > b->tot) return -1;
|
||||
if (a->tot < b->tot) return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* reset debugging information */
|
||||
void reset_timings(void)
|
||||
{
|
||||
_ifuncs = _itims = 0;
|
||||
}
|
||||
|
||||
/* dump the timing data */
|
||||
void dump_timings(void)
|
||||
{
|
||||
int x, y;
|
||||
ulong64 total;
|
||||
|
||||
/* first for every find the total time */
|
||||
printf("Phase I ... Finding totals (%d samples)...\n", _itims);
|
||||
for (x = 0; x < _itims; x++) {
|
||||
timings[x].tot = timings[x].end - timings[x].start;
|
||||
}
|
||||
|
||||
/* now subtract the time for each function where nested functions occured */
|
||||
printf("Phase II ... Finding dependencies...\n");
|
||||
for (x = 0; x < _itims-1; x++) {
|
||||
for (y = x+1; y < _itims && timings[y].start <= timings[x].end; y++) {
|
||||
timings[x].tot -= timings[y].tot;
|
||||
if (timings[x].tot > ((ulong64)1 << (ulong64)40)) {
|
||||
timings[x].tot = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* now consolidate all the entries */
|
||||
printf("Phase III... Consolidation...\n");
|
||||
|
||||
memset(&functime, 0, sizeof(functime));
|
||||
total = 0;
|
||||
for (x = 0; x < _itims; x++) {
|
||||
total += timings[x].tot;
|
||||
|
||||
/* try to find this entry */
|
||||
for (y = 0; functime[y].func != NULL; y++) {
|
||||
if (strcmp(timings[x].func, functime[y].func) == 0) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (functime[y].func == NULL) {
|
||||
/* new entry */
|
||||
functime[y].func = timings[x].func;
|
||||
functime[y].tot = timings[x].tot;
|
||||
} else {
|
||||
functime[y].tot += timings[x].tot;
|
||||
}
|
||||
}
|
||||
|
||||
for (x = 0; functime[x].func != NULL; x++);
|
||||
|
||||
/* sort and dump */
|
||||
qsort(&functime, x, sizeof(functime[0]), &qsort_helper);
|
||||
|
||||
for (x = 0; functime[x].func != NULL; x++) {
|
||||
if (functime[x].tot > 0 && strcmp(functime[x].func, "_verify") != 0) {
|
||||
printf("%30s: %20llu (%3llu.%03llu %%)\n", functime[x].func, functime[x].tot, (functime[x].tot * (ulong64)100) / total, ((functime[x].tot * (ulong64)100000) / total) % (ulong64)1000);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void _verify(mp_int *a, char *name, int line)
|
||||
{
|
||||
int n, y;
|
||||
static const char *err[] = { "Null DP", "alloc < used", "digits above used" };
|
||||
|
||||
REGFUNC("_verify");
|
||||
|
||||
/* dp null ? */
|
||||
y = 0;
|
||||
@ -52,6 +151,7 @@ static void _verify(mp_int *a, char *name, int line)
|
||||
}
|
||||
|
||||
/* ok */
|
||||
DECFUNC();
|
||||
return;
|
||||
error:
|
||||
printf("Error (%s) with variable {%s} on line %d\n", err[y], name, line);
|
||||
@ -64,7 +164,7 @@ error:
|
||||
exit(0);
|
||||
}
|
||||
|
||||
#else
|
||||
#else /* don't use DEBUG stuff so these macros are blank */
|
||||
|
||||
#define REGFUNC(name)
|
||||
#define DECFUNC()
|
||||
@ -76,13 +176,18 @@ error:
|
||||
int mp_init(mp_int *a)
|
||||
{
|
||||
REGFUNC("mp_init");
|
||||
a->dp = calloc(sizeof(mp_digit), 16);
|
||||
|
||||
/* allocate ram required and clear it */
|
||||
a->dp = calloc(sizeof(mp_digit), MP_PREC);
|
||||
if (a->dp == NULL) {
|
||||
DECFUNC();
|
||||
return MP_MEM;
|
||||
}
|
||||
|
||||
/* set the used to zero, allocated digit to the default precision
|
||||
* and sign to positive */
|
||||
a->used = 0;
|
||||
a->alloc = 16;
|
||||
a->alloc = MP_PREC;
|
||||
a->sign = MP_ZPOS;
|
||||
|
||||
VERIFY(a);
|
||||
@ -96,8 +201,14 @@ void mp_clear(mp_int *a)
|
||||
REGFUNC("mp_clear");
|
||||
if (a->dp != NULL) {
|
||||
VERIFY(a);
|
||||
memset(a->dp, 0, sizeof(mp_digit) * a->alloc);
|
||||
|
||||
/* first zero the digits */
|
||||
memset(a->dp, 0, sizeof(mp_digit) * a->used);
|
||||
|
||||
/* free ram */
|
||||
free(a->dp);
|
||||
|
||||
/* reset members to make debugging easier */
|
||||
a->dp = NULL;
|
||||
a->alloc = a->used = 0;
|
||||
}
|
||||
@ -118,27 +229,26 @@ void mp_exch(mp_int *a, mp_int *b)
|
||||
/* grow as required */
|
||||
static int mp_grow(mp_int *a, int size)
|
||||
{
|
||||
int i;
|
||||
mp_digit *tmp;
|
||||
int i, n;
|
||||
|
||||
REGFUNC("mp_grow");
|
||||
VERIFY(a);
|
||||
|
||||
/* if the alloc size is smaller alloc more ram */
|
||||
if (a->alloc < size) {
|
||||
size += 32 - (size & 15); /* ensure there are always at least 16 digits extra on top */
|
||||
size += (MP_PREC*2) - (size & (MP_PREC-1)); /* ensure there are always at least 16 digits extra on top */
|
||||
|
||||
tmp = calloc(sizeof(mp_digit), size);
|
||||
if (tmp == NULL) {
|
||||
a->dp = realloc(a->dp, sizeof(mp_digit)*size);
|
||||
if (a->dp == NULL) {
|
||||
DECFUNC();
|
||||
return MP_MEM;
|
||||
}
|
||||
for (i = 0; i < a->used; i++) {
|
||||
tmp[i] = a->dp[i];
|
||||
}
|
||||
free(a->dp);
|
||||
a->dp = tmp;
|
||||
|
||||
n = a->alloc;
|
||||
a->alloc = size;
|
||||
for (i = n; i < a->alloc; i++) {
|
||||
a->dp[i] = 0;
|
||||
}
|
||||
}
|
||||
DECFUNC();
|
||||
return MP_OKAY;
|
||||
@ -233,8 +343,7 @@ int mp_init_size(mp_int *a, int size)
|
||||
REGFUNC("mp_init_size");
|
||||
|
||||
/* pad up so there are at least 16 zero digits */
|
||||
size += 32 - (size & 15);
|
||||
|
||||
size += (MP_PREC*2) - (size & (MP_PREC-1)); /* ensure there are always at least 16 digits extra on top */
|
||||
a->dp = calloc(sizeof(mp_digit), size);
|
||||
if (a->dp == NULL) {
|
||||
DECFUNC();
|
||||
@ -270,7 +379,6 @@ int mp_copy(mp_int *a, mp_int *b)
|
||||
}
|
||||
|
||||
/* zero b and copy the parameters over */
|
||||
mp_zero(b);
|
||||
b->used = a->used;
|
||||
b->sign = a->sign;
|
||||
|
||||
@ -278,6 +386,11 @@ int mp_copy(mp_int *a, mp_int *b)
|
||||
for (n = 0; n < a->used; n++) {
|
||||
b->dp[n] = a->dp[n];
|
||||
}
|
||||
|
||||
/* clear high digits */
|
||||
for (n = b->used; n < b->alloc; n++) {
|
||||
b->dp[n] = 0;
|
||||
}
|
||||
DECFUNC();
|
||||
return MP_OKAY;
|
||||
}
|
||||
@ -513,7 +626,7 @@ int mp_mod_2d(mp_int *a, int b, mp_int *c)
|
||||
c->dp[x] = 0;
|
||||
}
|
||||
/* clear the digit that is not completely outside/inside the modulus */
|
||||
c->dp[b/DIGIT_BIT] &= (mp_digit)((((mp_digit)1)<<(b % DIGIT_BIT)) - ((mp_digit)1));
|
||||
c->dp[b/DIGIT_BIT] &= (mp_digit)((((mp_digit)1)<<(((mp_digit)b) % DIGIT_BIT)) - ((mp_digit)1));
|
||||
mp_clamp(c);
|
||||
DECFUNC();
|
||||
return MP_OKAY;
|
||||
@ -697,7 +810,7 @@ int mp_mul_2(mp_int *a, mp_int *b)
|
||||
return MP_OKAY;
|
||||
}
|
||||
|
||||
/* low level addition */
|
||||
/* low level addition, based on HAC pp.594, Algorithm 14.7 */
|
||||
static int s_mp_add(mp_int *a, mp_int *b, mp_int *c)
|
||||
{
|
||||
mp_int *x;
|
||||
@ -709,7 +822,9 @@ static int s_mp_add(mp_int *a, mp_int *b, mp_int *c)
|
||||
VERIFY(b);
|
||||
VERIFY(c);
|
||||
|
||||
/* find sizes */
|
||||
/* find sizes, we let |a| <= |b| which means we have to sort
|
||||
* them. "x" will point to the input with the most digits
|
||||
*/
|
||||
if (a->used > b->used) {
|
||||
min = b->used;
|
||||
max = a->used;
|
||||
@ -735,9 +850,11 @@ static int s_mp_add(mp_int *a, mp_int *b, mp_int *c)
|
||||
c->used = max + 1;
|
||||
|
||||
/* add digits from lower part */
|
||||
|
||||
/* set the carry to zero */
|
||||
u = 0;
|
||||
for (i = 0; i < min; i++) {
|
||||
/* T[i] = A[i] + B[i] + U */
|
||||
/* Compute the sum at one digit, T[i] = A[i] + B[i] + U */
|
||||
c->dp[i] = a->dp[i] + b->dp[i] + u;
|
||||
|
||||
/* U = carry bit of T[i] */
|
||||
@ -774,7 +891,7 @@ static int s_mp_add(mp_int *a, mp_int *b, mp_int *c)
|
||||
return MP_OKAY;
|
||||
}
|
||||
|
||||
/* low level subtraction (assumes a > b) */
|
||||
/* low level subtraction (assumes a > b), HAC pp.595 Algorithm 14.9 */
|
||||
static int s_mp_sub(mp_int *a, mp_int *b, mp_int *c)
|
||||
{
|
||||
int olduse, res, min, max, i;
|
||||
@ -800,6 +917,8 @@ static int s_mp_sub(mp_int *a, mp_int *b, mp_int *c)
|
||||
c->used = max;
|
||||
|
||||
/* sub digits from lower part */
|
||||
|
||||
/* set carry to zero */
|
||||
u = 0;
|
||||
for (i = 0; i < min; i++) {
|
||||
/* T[i] = A[i] - B[i] - U */
|
||||
@ -849,9 +968,8 @@ static int s_mp_sub(mp_int *a, mp_int *b, mp_int *c)
|
||||
*/
|
||||
static int fast_s_mp_mul_digs(mp_int *a, mp_int *b, mp_int *c, int digs)
|
||||
{
|
||||
int olduse, res, pa, pb, ix, iy;
|
||||
mp_word W[512], *_W;
|
||||
mp_digit tmpx, *tmpy;
|
||||
int olduse, res, pa, ix;
|
||||
mp_word W[512];
|
||||
|
||||
REGFUNC("fast_s_mp_mul_digs");
|
||||
VERIFY(a);
|
||||
@ -866,7 +984,7 @@ static int fast_s_mp_mul_digs(mp_int *a, mp_int *b, mp_int *c, int digs)
|
||||
}
|
||||
|
||||
/* clear temp buf (the columns) */
|
||||
memset(W, 0, digs*sizeof(mp_word));
|
||||
memset(W, 0, sizeof(mp_word) * digs);
|
||||
|
||||
/* calculate the columns */
|
||||
pa = a->used;
|
||||
@ -876,21 +994,41 @@ static int fast_s_mp_mul_digs(mp_int *a, mp_int *b, mp_int *c, int digs)
|
||||
* of output are produced. So at most we want to make upto "digs" digits
|
||||
* of output
|
||||
*/
|
||||
pb = MIN(b->used, digs - ix);
|
||||
|
||||
/* setup some pointer aliases to simplify the inner loop */
|
||||
tmpx = a->dp[ix];
|
||||
tmpy = b->dp;
|
||||
_W = &(W[ix]);
|
||||
|
||||
/* this adds products to distinct columns (at ix+iy) of W
|
||||
* note that each step through the loop is not dependent on
|
||||
* the previous which means the compiler can easily unroll
|
||||
* the loop without scheduling problems
|
||||
*/
|
||||
for (iy = 0; iy < pb; iy++) {
|
||||
*_W++ += ((mp_word)tmpx) * ((mp_word)*tmpy++);
|
||||
{
|
||||
register mp_digit tmpx, *tmpy;
|
||||
register mp_word *_W;
|
||||
register int iy, pb;
|
||||
|
||||
/* alias for the the word on the left e.g. A[ix] * A[iy] */
|
||||
tmpx = a->dp[ix];
|
||||
|
||||
/* alias for the right side */
|
||||
tmpy = b->dp;
|
||||
|
||||
/* alias for the columns, each step through the loop adds a new
|
||||
term to each column
|
||||
*/
|
||||
_W = W + ix;
|
||||
|
||||
|
||||
/* the number of digits is limited by their placement. E.g.
|
||||
we avoid multiplying digits that will end up above the # of
|
||||
digits of precision requested
|
||||
*/
|
||||
pb = MIN(b->used, digs - ix);
|
||||
|
||||
for (iy = 0; iy < pb; iy++) {
|
||||
*_W++ += ((mp_word)tmpx) * ((mp_word)*tmpy++);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/* setup dest */
|
||||
@ -908,11 +1046,12 @@ static int fast_s_mp_mul_digs(mp_int *a, mp_int *b, mp_int *c, int digs)
|
||||
* N^2 + N*c where c is the cost of the shifting. On very small numbers
|
||||
* this is slower but on most cryptographic size numbers it is faster.
|
||||
*/
|
||||
|
||||
for (ix = 1; ix < digs; ix++) {
|
||||
W[ix] = W[ix] + (W[ix-1] >> ((mp_word)DIGIT_BIT));
|
||||
c->dp[ix-1] = W[ix-1] & ((mp_word)MP_MASK);
|
||||
W[ix] += (W[ix-1] >> ((mp_word)DIGIT_BIT));
|
||||
c->dp[ix-1] = (mp_digit)(W[ix-1] & ((mp_word)MP_MASK));
|
||||
}
|
||||
c->dp[digs-1] = W[digs-1] & ((mp_word)MP_MASK);
|
||||
c->dp[digs-1] = (mp_digit)(W[digs-1] & ((mp_word)MP_MASK));
|
||||
|
||||
/* clear unused */
|
||||
for (ix = c->used; ix < olduse; ix++) {
|
||||
@ -924,7 +1063,10 @@ static int fast_s_mp_mul_digs(mp_int *a, mp_int *b, mp_int *c, int digs)
|
||||
return MP_OKAY;
|
||||
}
|
||||
|
||||
/* multiplies |a| * |b| and only computes upto digs digits of result */
|
||||
/* multiplies |a| * |b| and only computes upto digs digits of result
|
||||
* HAC pp. 595, Algorithm 14.12 Modified so you can control how many digits of
|
||||
* output are created.
|
||||
*/
|
||||
static int s_mp_mul_digs(mp_int *a, mp_int *b, mp_int *c, int digs)
|
||||
{
|
||||
mp_int t;
|
||||
@ -963,7 +1105,7 @@ static int s_mp_mul_digs(mp_int *a, mp_int *b, mp_int *c, int digs)
|
||||
|
||||
/* limit ourselves to making digs digits of output */
|
||||
pb = MIN(b->used, digs - ix);
|
||||
|
||||
|
||||
/* setup some aliases */
|
||||
tmpx = a->dp[ix];
|
||||
tmpt = &(t.dp[ix]);
|
||||
@ -1001,9 +1143,8 @@ static int s_mp_mul_digs(mp_int *a, mp_int *b, mp_int *c, int digs)
|
||||
*/
|
||||
static int fast_s_mp_mul_high_digs(mp_int *a, mp_int *b, mp_int *c, int digs)
|
||||
{
|
||||
int oldused, newused, res, pa, pb, ix, iy;
|
||||
mp_word W[512], *_W;
|
||||
mp_digit tmpx, *tmpy;
|
||||
int oldused, newused, res, pa, pb, ix;
|
||||
mp_word W[512];
|
||||
|
||||
REGFUNC("fast_s_mp_mul_high_digs");
|
||||
VERIFY(a);
|
||||
@ -1023,14 +1164,29 @@ static int fast_s_mp_mul_high_digs(mp_int *a, mp_int *b, mp_int *c, int digs)
|
||||
pb = b->used;
|
||||
memset(&W[digs], 0, (pa + pb + 1 - digs) * sizeof(mp_word));
|
||||
for (ix = 0; ix < pa; ix++) {
|
||||
/* pointer aliases */
|
||||
tmpx = a->dp[ix];
|
||||
tmpy = b->dp + (digs - ix);
|
||||
_W = &(W[digs]);
|
||||
{
|
||||
register mp_digit tmpx, *tmpy;
|
||||
register int iy;
|
||||
register mp_word *_W;
|
||||
|
||||
/* work todo, that is we only calculate digits that are at "digs" or above */
|
||||
iy = digs - ix;
|
||||
|
||||
/* copy of word on the left of A[ix] * B[iy] */
|
||||
tmpx = a->dp[ix];
|
||||
|
||||
/* alias for right side */
|
||||
tmpy = b->dp + iy;
|
||||
|
||||
/* alias for the columns of output. Offset to be equal to or above the
|
||||
* smallest digit place requested
|
||||
*/
|
||||
_W = &(W[digs]);
|
||||
|
||||
/* compute column products for digits above the minimum */
|
||||
for (iy = digs - ix; iy < pb; iy++) {
|
||||
*_W++ += ((mp_word)tmpx) * ((mp_word)*tmpy++);
|
||||
/* compute column products for digits above the minimum */
|
||||
for (; iy < pb; iy++) {
|
||||
*_W++ += ((mp_word)tmpx) * ((mp_word)*tmpy++);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -1040,10 +1196,10 @@ static int fast_s_mp_mul_high_digs(mp_int *a, mp_int *b, mp_int *c, int digs)
|
||||
|
||||
/* now convert the array W downto what we need */
|
||||
for (ix = digs+1; ix < (pa+pb+1); ix++) {
|
||||
W[ix] = W[ix] + (W[ix-1] >> ((mp_word)DIGIT_BIT));
|
||||
c->dp[ix-1] = W[ix-1] & ((mp_word)MP_MASK);
|
||||
W[ix] += (W[ix-1] >> ((mp_word)DIGIT_BIT));
|
||||
c->dp[ix-1] = (mp_digit)(W[ix-1] & ((mp_word)MP_MASK));
|
||||
}
|
||||
c->dp[(pa+pb+1)-1] = W[(pa+pb+1)-1] & ((mp_word)MP_MASK);
|
||||
c->dp[(pa+pb+1)-1] = (mp_digit)(W[(pa+pb+1)-1] & ((mp_word)MP_MASK));
|
||||
|
||||
for (ix = c->used; ix < oldused; ix++) {
|
||||
c->dp[ix] = 0;
|
||||
@ -1085,13 +1241,26 @@ static int s_mp_mul_high_digs(mp_int *a, mp_int *b, mp_int *c, int digs)
|
||||
pa = a->used;
|
||||
pb = b->used;
|
||||
for (ix = 0; ix < pa; ix++) {
|
||||
/* clear the carry */
|
||||
u = 0;
|
||||
|
||||
/* left hand side of A[ix] * B[iy] */
|
||||
tmpx = a->dp[ix];
|
||||
|
||||
/* alias to the address of where the digits will be stored */
|
||||
tmpt = &(t.dp[digs]);
|
||||
|
||||
/* alias for where to read the right hand side from */
|
||||
tmpy = b->dp + (digs - ix);
|
||||
|
||||
for (iy = digs - ix; iy < pb; iy++) {
|
||||
/* calculate the double precision result */
|
||||
r = ((mp_word)*tmpt) + ((mp_word)tmpx) * ((mp_word)*tmpy++) + ((mp_word)u);
|
||||
|
||||
/* get the lower part */
|
||||
*tmpt++ = (mp_digit)(r & ((mp_word)MP_MASK));
|
||||
|
||||
/* carry the carry */
|
||||
u = (mp_digit)(r >> ((mp_word)DIGIT_BIT));
|
||||
}
|
||||
*tmpt = u;
|
||||
@ -1119,9 +1288,8 @@ static int s_mp_mul_high_digs(mp_int *a, mp_int *b, mp_int *c, int digs)
|
||||
*/
|
||||
static int fast_s_mp_sqr(mp_int *a, mp_int *b)
|
||||
{
|
||||
int olduse, newused, res, ix, iy, pa;
|
||||
mp_word W2[512], W[512], *_W;
|
||||
mp_digit tmpx, *tmpy;
|
||||
int olduse, newused, res, ix, pa;
|
||||
mp_word W2[512], W[512];
|
||||
|
||||
REGFUNC("fast_s_mp_sqr");
|
||||
VERIFY(a);
|
||||
@ -1144,14 +1312,23 @@ static int fast_s_mp_sqr(mp_int *a, mp_int *b)
|
||||
/* compute the outer product */
|
||||
W2[ix+ix] += ((mp_word)a->dp[ix]) * ((mp_word)a->dp[ix]);
|
||||
|
||||
/* pointer aliasing! */
|
||||
tmpx = a->dp[ix];
|
||||
tmpy = &(a->dp[ix+1]);
|
||||
_W = &(W[ix+ix+1]);
|
||||
{
|
||||
register mp_digit tmpx, *tmpy;
|
||||
register mp_word *_W;
|
||||
register int iy;
|
||||
|
||||
/* copy of left side */
|
||||
tmpx = a->dp[ix];
|
||||
|
||||
/* alias for right side */
|
||||
tmpy = a->dp + (ix + 1);
|
||||
|
||||
_W = &(W[ix+ix+1]);
|
||||
|
||||
/* inner products */
|
||||
for (iy = ix + 1; iy < pa; iy++) {
|
||||
*_W++ += ((mp_word)tmpx) * ((mp_word)*tmpy++);
|
||||
/* inner products */
|
||||
for (iy = ix + 1; iy < pa; iy++) {
|
||||
*_W++ += ((mp_word)tmpx) * ((mp_word)*tmpy++);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -1168,9 +1345,9 @@ static int fast_s_mp_sqr(mp_int *a, mp_int *b)
|
||||
W[ix] += W[ix] + W2[ix];
|
||||
|
||||
W[ix] = W[ix] + (W[ix-1] >> ((mp_word)DIGIT_BIT));
|
||||
b->dp[ix-1] = W[ix-1] & ((mp_word)MP_MASK);
|
||||
b->dp[ix-1] = (mp_digit)(W[ix-1] & ((mp_word)MP_MASK));
|
||||
}
|
||||
b->dp[(pa+pa+1)-1] = W[(pa+pa+1)-1] & ((mp_word)MP_MASK);
|
||||
b->dp[(pa+pa+1)-1] = (mp_digit)(W[(pa+pa+1)-1] & ((mp_word)MP_MASK));
|
||||
|
||||
/* clear high */
|
||||
for (ix = b->used; ix < olduse; ix++) {
|
||||
@ -1185,7 +1362,7 @@ static int fast_s_mp_sqr(mp_int *a, mp_int *b)
|
||||
return MP_OKAY;
|
||||
}
|
||||
|
||||
/* low level squaring, b = a*a */
|
||||
/* low level squaring, b = a*a, HAC pp.596-597, Algorithm 14.16 */
|
||||
static int s_mp_sqr(mp_int *a, mp_int *b)
|
||||
{
|
||||
mp_int t;
|
||||
@ -1212,15 +1389,34 @@ static int s_mp_sqr(mp_int *a, mp_int *b)
|
||||
t.used = pa + pa + 1;
|
||||
|
||||
for (ix = 0; ix < pa; ix++) {
|
||||
/* first calculate the digit at 2*ix */
|
||||
/* calculate double precision result */
|
||||
r = ((mp_word)t.dp[ix+ix]) + ((mp_word)a->dp[ix]) * ((mp_word)a->dp[ix]);
|
||||
|
||||
/* store lower part in result */
|
||||
t.dp[ix+ix] = (mp_digit)(r & ((mp_word)MP_MASK));
|
||||
|
||||
/* get the carry */
|
||||
u = (r >> ((mp_word)DIGIT_BIT));
|
||||
|
||||
/* left hand side of A[ix] * A[iy] */
|
||||
tmpx = a->dp[ix];
|
||||
|
||||
/* alias for where to store the results */
|
||||
tmpt = &(t.dp[ix+ix+1]);
|
||||
for (iy = ix + 1; iy < pa; iy++) {
|
||||
/* first calculate the product */
|
||||
r = ((mp_word)tmpx) * ((mp_word)a->dp[iy]);
|
||||
|
||||
/* now calculate the double precision result, note we use
|
||||
* addition instead of *2 since its easier to optimize
|
||||
*/
|
||||
r = ((mp_word)*tmpt) + r + r + ((mp_word)u);
|
||||
|
||||
/* store lower part */
|
||||
*tmpt++ = (mp_digit)(r & ((mp_word)MP_MASK));
|
||||
|
||||
/* get carry */
|
||||
u = (r >> ((mp_word)DIGIT_BIT));
|
||||
}
|
||||
r = ((mp_word)*tmpt) + u;
|
||||
@ -1334,11 +1530,11 @@ int mp_sub(mp_int *a, mp_int *b, mp_int *c)
|
||||
return res;
|
||||
}
|
||||
|
||||
/* c = |a| * |b| using Karatsuba */
|
||||
/* c = |a| * |b| using Karatsuba Multiplication */
|
||||
static int mp_karatsuba_mul(mp_int *a, mp_int *b, mp_int *c)
|
||||
{
|
||||
mp_int x0, x1, y0, y1, t1, t2, x0y0, x1y1;
|
||||
int B, err, neg, x;
|
||||
int B, err, x;
|
||||
|
||||
REGFUNC("mp_karatsuba_mul");
|
||||
VERIFY(a);
|
||||
@ -1396,9 +1592,7 @@ static int mp_karatsuba_mul(mp_int *a, mp_int *b, mp_int *c)
|
||||
/* now calc x1-x0 and y1-y0 */
|
||||
if (mp_sub(&x1, &x0, &t1) != MP_OKAY) goto X1Y1; /* t1 = x1 - x0 */
|
||||
if (mp_sub(&y1, &y0, &t2) != MP_OKAY) goto X1Y1; /* t2 = y1 - y0 */
|
||||
neg = (t1.sign == t2.sign) ? MP_ZPOS : MP_NEG;
|
||||
if (mp_mul(&t1, &t2, &t1) != MP_OKAY) goto X1Y1; /* t1 = (x1 - x0) * (y1 - y0) */
|
||||
t1.sign = neg;
|
||||
|
||||
/* add x0y0 */
|
||||
if (mp_add(&x0y0, &x1y1, &t2) != MP_OKAY) goto X1Y1; /* t2 = x0y0 + x1y1 */
|
||||
@ -1538,7 +1732,15 @@ int mp_sqr(mp_int *a, mp_int *b)
|
||||
}
|
||||
|
||||
|
||||
/* integer signed division. c*b + d == a [e.g. a/b, c=quotient, d=remainder] */
|
||||
/* integer signed division. c*b + d == a [e.g. a/b, c=quotient, d=remainder]
|
||||
* HAC pp.598 Algorithm 14.20
|
||||
*
|
||||
* Note that the description in HAC is horribly incomplete. For example,
|
||||
* it doesn't consider the case where digits are removed from 'x' in the inner
|
||||
* loop. It also doesn't consider the case that y has fewer than three digits, etc..
|
||||
*
|
||||
* The overall algorithm is as described as 14.20 from HAC but fixed to treat these cases.
|
||||
*/
|
||||
int mp_div(mp_int *a, mp_int *b, mp_int *c, mp_int *d)
|
||||
{
|
||||
mp_int q, x, y, t1, t2;
|
||||
@ -1596,7 +1798,7 @@ int mp_div(mp_int *a, mp_int *b, mp_int *c, mp_int *d)
|
||||
neg = (a->sign == b->sign) ? MP_ZPOS : MP_NEG;
|
||||
x.sign = y.sign = MP_ZPOS;
|
||||
|
||||
/* normalize */
|
||||
/* normalize both x and y, ensure that y >= b/2, [b == 2^DIGIT_BIT] */
|
||||
norm = 0;
|
||||
while ((y.dp[y.used-1] & (((mp_digit)1)<<(DIGIT_BIT-1))) == ((mp_digit)0)) {
|
||||
++norm;
|
||||
@ -1927,7 +2129,7 @@ int mp_expt_d(mp_int *a, mp_digit b, mp_int *c)
|
||||
return res;
|
||||
}
|
||||
|
||||
if (b & (mp_digit)(1<<(DIGIT_BIT-1))) {
|
||||
if ((b & (mp_digit)(1<<(DIGIT_BIT-1))) != 0) {
|
||||
if ((res = mp_mul(c, &g, c)) != MP_OKAY) {
|
||||
mp_clear(&g);
|
||||
DECFUNC();
|
||||
@ -2106,8 +2308,12 @@ int mp_gcd(mp_int *a, mp_int *b, mp_int *c)
|
||||
k = 0;
|
||||
while ((u.dp[0] & 1) == 0 && (v.dp[0] & 1) == 0) {
|
||||
++k;
|
||||
mp_div_2d(&u, 1, &u, NULL);
|
||||
mp_div_2d(&v, 1, &v, NULL);
|
||||
if ((res = mp_div_2d(&u, 1, &u, NULL)) != MP_OKAY) {
|
||||
goto __T;
|
||||
}
|
||||
if ((res = mp_div_2d(&v, 1, &v, NULL)) != MP_OKAY) {
|
||||
goto __T;
|
||||
}
|
||||
}
|
||||
|
||||
/* B2. Initialize */
|
||||
@ -2125,7 +2331,9 @@ int mp_gcd(mp_int *a, mp_int *b, mp_int *c)
|
||||
do {
|
||||
/* B3 (and B4). Halve t, if even */
|
||||
while (t.used != 0 && (t.dp[0] & 1) == 0) {
|
||||
mp_div_2d(&t, 1, &t, NULL);
|
||||
if ((res = mp_div_2d(&t, 1, &t, NULL)) != MP_OKAY) {
|
||||
goto __T;
|
||||
}
|
||||
}
|
||||
|
||||
/* B5. if t>0 then u=t otherwise v=-t */
|
||||
@ -2563,7 +2771,9 @@ int mp_reduce_setup(mp_int *a, mp_int *b)
|
||||
return res;
|
||||
}
|
||||
|
||||
/* reduces x mod m, assumes 0 < x < m^2, mu is precomputed via mp_reduce_setup */
|
||||
/* reduces x mod m, assumes 0 < x < m^2, mu is precomputed via mp_reduce_setup
|
||||
* From HAC pp.604 Algorithm 14.42
|
||||
*/
|
||||
int mp_reduce(mp_int *x, mp_int *m, mp_int *mu)
|
||||
{
|
||||
mp_int q;
|
||||
@ -2595,10 +2805,14 @@ int mp_reduce(mp_int *x, mp_int *m, mp_int *mu)
|
||||
mp_rshd(&q, um + 1); /* q3 = q2 / b^(k+1) */
|
||||
|
||||
/* x = x mod b^(k+1), quick (no division) */
|
||||
mp_mod_2d(x, DIGIT_BIT * (um + 1), x);
|
||||
if ((res = mp_mod_2d(x, DIGIT_BIT * (um + 1), x)) != MP_OKAY) {
|
||||
goto CLEANUP;
|
||||
}
|
||||
|
||||
/* q = q * m mod b^(k+1), quick (no division) */
|
||||
s_mp_mul_digs(&q, m, &q, um + 1);
|
||||
if ((res = s_mp_mul_digs(&q, m, &q, um + 1)) != MP_OKAY) {
|
||||
goto CLEANUP;
|
||||
}
|
||||
|
||||
/* x = x - q */
|
||||
if((res = mp_sub(x, &q, x)) != MP_OKAY)
|
||||
@ -2626,6 +2840,11 @@ int mp_reduce(mp_int *x, mp_int *m, mp_int *mu)
|
||||
return res;
|
||||
}
|
||||
|
||||
/* computes Y == G^X mod P, HAC pp.616, Algorithm 14.85
|
||||
*
|
||||
* Uses a left-to-right k-ary sliding window to compute the modular exponentiation.
|
||||
* The value of k changes based on the size of the exponent.
|
||||
*/
|
||||
int mp_exptmod(mp_int *G, mp_int *X, mp_int *P, mp_int *Y)
|
||||
{
|
||||
mp_int M[64], res, mu;
|
||||
@ -2648,7 +2867,7 @@ int mp_exptmod(mp_int *G, mp_int *X, mp_int *P, mp_int *Y)
|
||||
|
||||
/* init G array */
|
||||
for (x = 0; x < (1<<winsize); x++) {
|
||||
if ((err = mp_init(&M[x])) != MP_OKAY) {
|
||||
if ((err = mp_init_size(&M[x], 1)) != MP_OKAY) {
|
||||
for (y = 0; y < x; y++) {
|
||||
mp_clear(&M[y]);
|
||||
}
|
||||
@ -2726,7 +2945,7 @@ int mp_exptmod(mp_int *G, mp_int *X, mp_int *P, mp_int *Y)
|
||||
break;
|
||||
}
|
||||
buf = X->dp[digidx--];
|
||||
bitcnt = DIGIT_BIT;
|
||||
bitcnt = (int)DIGIT_BIT;
|
||||
}
|
||||
|
||||
/* grab the next msb from the exponent */
|
||||
@ -2793,7 +3012,7 @@ int mp_exptmod(mp_int *G, mp_int *X, mp_int *P, mp_int *Y)
|
||||
}
|
||||
|
||||
bitbuf <<= 1;
|
||||
if (bitbuf & (1<<winsize)) {
|
||||
if ((bitbuf & (1<<winsize)) != 0) {
|
||||
/* then multiply */
|
||||
if ((err = mp_mul(&res, &M[1], &res)) != MP_OKAY) {
|
||||
goto __MU;
|
||||
@ -3044,7 +3263,7 @@ int mp_read_radix(mp_int *a, char *str, int radix)
|
||||
return MP_VAL;
|
||||
}
|
||||
|
||||
if (*str == (unsigned char)'-') {
|
||||
if (*str == '-') {
|
||||
++str;
|
||||
neg = MP_NEG;
|
||||
} else {
|
||||
@ -3053,7 +3272,7 @@ int mp_read_radix(mp_int *a, char *str, int radix)
|
||||
|
||||
mp_zero(a);
|
||||
while (*str) {
|
||||
ch = (radix < 36) ? toupper(*str) : *str;
|
||||
ch = (char)((radix < 36) ? toupper(*str) : *str);
|
||||
for (y = 0; y < 64; y++) {
|
||||
if (ch == s_rmap[y]) {
|
||||
break;
|
||||
|
13
bn.h
13
bn.h
@ -50,7 +50,7 @@
|
||||
typedef unsigned long mp_digit;
|
||||
typedef ulong64 mp_word;
|
||||
|
||||
#define DIGIT_BIT 28U
|
||||
#define DIGIT_BIT 28
|
||||
#endif
|
||||
|
||||
#ifndef DIGIT_BIT
|
||||
@ -77,13 +77,10 @@
|
||||
typedef int mp_err;
|
||||
|
||||
/* you'll have to tune these... */
|
||||
#ifdef FAST_FPU
|
||||
#define KARATSUBA_MUL_CUTOFF 100 /* Min. number of digits before Karatsuba multiplication is used. */
|
||||
#define KARATSUBA_SQR_CUTOFF 100 /* Min. number of digits before Karatsuba squaring is used. */
|
||||
#else
|
||||
#define KARATSUBA_MUL_CUTOFF 80 /* Min. number of digits before Karatsuba multiplication is used. */
|
||||
#define KARATSUBA_SQR_CUTOFF 80 /* Min. number of digits before Karatsuba squaring is used. */
|
||||
#endif
|
||||
#define KARATSUBA_MUL_CUTOFF 80 /* Min. number of digits before Karatsuba multiplication is used. */
|
||||
#define KARATSUBA_SQR_CUTOFF 80 /* Min. number of digits before Karatsuba squaring is used. */
|
||||
|
||||
#define MP_PREC 64 /* default digits of precision */
|
||||
|
||||
typedef struct {
|
||||
int used, alloc, sign;
|
||||
|
22
bn.tex
22
bn.tex
@ -1,7 +1,7 @@
|
||||
\documentclass{article}
|
||||
\begin{document}
|
||||
|
||||
\title{LibTomMath v0.07 \\ A Free Multiple Precision Integer Library}
|
||||
\title{LibTomMath v0.08 \\ A Free Multiple Precision Integer Library}
|
||||
\author{Tom St Denis \\ tomstdenis@iahu.ca}
|
||||
\maketitle
|
||||
\newpage
|
||||
@ -484,23 +484,23 @@ Multiply & 128 & 1,426 & 451 \\
|
||||
Multiply & 256 & 2,551 & 958 \\
|
||||
Multiply & 512 & 7,913 & 2,476 \\
|
||||
Multiply & 1024 & 28,496 & 7,927 \\
|
||||
Multiply & 2048 & 109,897 & 282,24 \\
|
||||
Multiply & 4096 & 469,970 & 104,681 \\
|
||||
Multiply & 2048 & 109,897 & 28,224 \\
|
||||
Multiply & 4096 & 469,970 & 101,171 \\
|
||||
\hline
|
||||
Square & 128 & 1,319 & 511 \\
|
||||
Square & 256 & 1,776 & 947 \\
|
||||
Square & 512 & 5,399 & 2,153 \\
|
||||
Square & 1024 & 18,991 & 5,733 \\
|
||||
Square & 2048 & 72,126 & 17,621 \\
|
||||
Square & 4096 & 306,269 & 70,168 \\
|
||||
Square & 4096 & 306,269 & 67,576 \\
|
||||
\hline
|
||||
Exptmod & 512 & 32,021,586 & 4,472,406 \\
|
||||
Exptmod & 768 & 97,595,492 & 10,427,845 \\
|
||||
Exptmod & 1024 & 223,302,532 & 20,561,722 \\
|
||||
Exptmod & 2048 & 1,682,223,369 & 113,978,803 \\
|
||||
Exptmod & 2560 & 3,268,615,571 & 236,650,133 \\
|
||||
Exptmod & 3072 & 5,597,240,141 & 373,449,291 \\
|
||||
Exptmod & 4096 & 13,347,270,891 & 787,568,457
|
||||
Exptmod & 512 & 32,021,586 & 4,138,354 \\
|
||||
Exptmod & 768 & 97,595,492 & 9,840,233 \\
|
||||
Exptmod & 1024 & 223,302,532 & 20,624,553 \\
|
||||
Exptmod & 2048 & 1,682,223,369 & 114,936,361 \\
|
||||
Exptmod & 2560 & 3,268,615,571 & 229,402,426 \\
|
||||
Exptmod & 3072 & 5,597,240,141 & 367,403,840 \\
|
||||
Exptmod & 4096 & 13,347,270,891 & 779,058,433
|
||||
|
||||
\end{tabular}
|
||||
\end{center}
|
||||
|
@ -1,3 +1,11 @@
|
||||
Jan 2nd, 2003
|
||||
v0.08 -- Sped up the multipliers by moving the inner loop variables into a smaller scope
|
||||
-- Corrected a bunch of small "warnings"
|
||||
-- Added more comments
|
||||
-- Made "mtest" be able to use /dev/random, /dev/urandom or stdin for RNG data
|
||||
-- Corrected some bugs where error messages were potentially ignored
|
||||
-- add etc/pprime.c program which makes numbers which are provably prime.
|
||||
|
||||
Jan 1st, 2003
|
||||
v0.07 -- Removed alot of heap operations from core functions to speed them up
|
||||
-- Added a root finding function [and mp_sqrt macro like from MPI]
|
||||
|
26
demo.c
26
demo.c
@ -23,7 +23,6 @@ extern ulong64 rdtsc(void);
|
||||
extern void reset(void);
|
||||
#else
|
||||
|
||||
|
||||
ulong64 _tt;
|
||||
void reset(void) { _tt = clock(); }
|
||||
ulong64 rdtsc(void) { return clock() - _tt; }
|
||||
@ -33,6 +32,8 @@ ulong64 rdtsc(void) { return clock() - _tt; }
|
||||
int _ifuncs;
|
||||
#else
|
||||
extern int _ifuncs;
|
||||
extern void dump_timings(void);
|
||||
extern void reset_timings(void);
|
||||
#endif
|
||||
|
||||
void ndraw(mp_int *a, char *name)
|
||||
@ -103,11 +104,25 @@ int main(void)
|
||||
|
||||
mp_read_radix(&b, "4982748972349724892742", 10);
|
||||
mp_sub_d(&a, 1, &c);
|
||||
|
||||
#ifdef DEBUG
|
||||
mp_sqr(&a, &a);mp_sqr(&c, &c);
|
||||
mp_sqr(&a, &a);mp_sqr(&c, &c);
|
||||
mp_sqr(&a, &a);mp_sqr(&c, &c);
|
||||
reset_timings();
|
||||
#endif
|
||||
mp_exptmod(&b, &c, &a, &d);
|
||||
mp_toradix(&d, buf, 10);
|
||||
printf("b^p-1 == %s\n", buf);
|
||||
#ifdef DEBUG
|
||||
dump_timings();
|
||||
return 0;
|
||||
|
||||
#ifdef TIMER
|
||||
#endif
|
||||
|
||||
mp_toradix(&d, buf, 10);
|
||||
printf("b^p-1 == %s\n", buf);
|
||||
|
||||
|
||||
#ifdef TIMER
|
||||
mp_read_radix(&a, "340282366920938463463374607431768211455", 10);
|
||||
mp_read_radix(&b, "340282366920938463463574607431768211455", 10);
|
||||
while (a.used * DIGIT_BIT < 8192) {
|
||||
@ -194,7 +209,8 @@ int main(void)
|
||||
}
|
||||
printf("Exponentiating %d-bit took %llu cycles\n", mp_count_bits(&a), tt / ((ulong64)35));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
mp_read_radix(&a, "340282366920938463463374607431768211455", 10);
|
||||
mp_read_radix(&b, "234892374891378913789237289378973232333", 10);
|
||||
|
1
etc/makefile
Normal file
1
etc/makefile
Normal file
@ -0,0 +1 @@
|
||||
CFLAGS += -I../ -Wall -W -O3 -fomit-frame-pointer -funroll-loops ../bn.c
|
281
etc/pprime.c
Normal file
281
etc/pprime.c
Normal file
@ -0,0 +1,281 @@
|
||||
/* Generates provable primes
|
||||
*
|
||||
* See http://iahu.ca:8080/papers/pp.pdf for more info.
|
||||
*
|
||||
* Tom St Denis, tomstdenis@iahu.ca, http://tom.iahu.ca
|
||||
*/
|
||||
#include <time.h>
|
||||
#include "bn.h"
|
||||
|
||||
/* fast square root */
|
||||
static mp_digit i_sqrt(mp_word x)
|
||||
{
|
||||
mp_word x1, x2;
|
||||
|
||||
x2 = x;
|
||||
do {
|
||||
x1 = x2;
|
||||
x2 = x1 - ((x1 * x1) - x)/(2*x1);
|
||||
} while (x1 != x2);
|
||||
|
||||
if (x1*x1 > x) {
|
||||
--x1;
|
||||
}
|
||||
|
||||
return x1;
|
||||
}
|
||||
|
||||
|
||||
/* generates a prime digit */
|
||||
static mp_digit prime_digit()
|
||||
{
|
||||
mp_digit r, x, y, next;
|
||||
|
||||
/* make a DIGIT_BIT-bit random number */
|
||||
for (r = x = 0; x < DIGIT_BIT; x++) {
|
||||
r = (r << 1) | (rand() & 1);
|
||||
}
|
||||
|
||||
/* now force it odd */
|
||||
r |= 1;
|
||||
|
||||
/* force it to be >30 */
|
||||
if (r < 30) {
|
||||
r += 30;
|
||||
}
|
||||
|
||||
/* get square root, since if 'r' is composite its factors must be < than this */
|
||||
y = i_sqrt(r);
|
||||
next = (y+1)*(y+1);
|
||||
|
||||
do {
|
||||
r += 2; /* next candidate */
|
||||
|
||||
/* update sqrt ? */
|
||||
if (next <= r) {
|
||||
++y;
|
||||
next = (y+1)*(y+1);
|
||||
}
|
||||
|
||||
/* loop if divisible by 3,5,7,11,13,17,19,23,29 */
|
||||
if ((r % 3) == 0) { x = 0; continue; }
|
||||
if ((r % 5) == 0) { x = 0; continue; }
|
||||
if ((r % 7) == 0) { x = 0; continue; }
|
||||
if ((r % 11) == 0) { x = 0; continue; }
|
||||
if ((r % 13) == 0) { x = 0; continue; }
|
||||
if ((r % 17) == 0) { x = 0; continue; }
|
||||
if ((r % 19) == 0) { x = 0; continue; }
|
||||
if ((r % 23) == 0) { x = 0; continue; }
|
||||
if ((r % 29) == 0) { x = 0; continue; }
|
||||
|
||||
/* now check if r is divisible by x + k={1,7,11,13,17,19,23,29} */
|
||||
for (x = 30; x <= y; x += 30) {
|
||||
if ((r % (x+1)) == 0) { x = 0; break; }
|
||||
if ((r % (x+7)) == 0) { x = 0; break; }
|
||||
if ((r % (x+11)) == 0) { x = 0; break; }
|
||||
if ((r % (x+13)) == 0) { x = 0; break; }
|
||||
if ((r % (x+17)) == 0) { x = 0; break; }
|
||||
if ((r % (x+19)) == 0) { x = 0; break; }
|
||||
if ((r % (x+23)) == 0) { x = 0; break; }
|
||||
if ((r % (x+29)) == 0) { x = 0; break; }
|
||||
}
|
||||
} while (x == 0);
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
/* makes a prime of at least k bits */
|
||||
int pprime(int k, mp_int *p, mp_int *q)
|
||||
{
|
||||
mp_int a, b, c, n, x, y, z, v;
|
||||
int res;
|
||||
|
||||
/* single digit ? */
|
||||
if (k <= (int)DIGIT_BIT) {
|
||||
mp_set(p, prime_digit());
|
||||
return MP_OKAY;
|
||||
}
|
||||
|
||||
if ((res = mp_init(&c)) != MP_OKAY) {
|
||||
return res;
|
||||
}
|
||||
|
||||
if ((res = mp_init(&v)) != MP_OKAY) {
|
||||
goto __C;
|
||||
}
|
||||
|
||||
/* product of first 50 primes */
|
||||
if ((res = mp_read_radix(&v, "19078266889580195013601891820992757757219839668357012055907516904309700014933909014729740190", 10)) != MP_OKAY) {
|
||||
goto __V;
|
||||
}
|
||||
|
||||
if ((res = mp_init(&a)) != MP_OKAY) {
|
||||
goto __V;
|
||||
}
|
||||
|
||||
/* set the prime */
|
||||
mp_set(&a, prime_digit());
|
||||
|
||||
if ((res = mp_init(&b)) != MP_OKAY) {
|
||||
goto __A;
|
||||
}
|
||||
|
||||
if ((res = mp_init(&n)) != MP_OKAY) {
|
||||
goto __B;
|
||||
}
|
||||
|
||||
if ((res = mp_init(&x)) != MP_OKAY) {
|
||||
goto __N;
|
||||
}
|
||||
|
||||
if ((res = mp_init(&y)) != MP_OKAY) {
|
||||
goto __X;
|
||||
}
|
||||
|
||||
if ((res = mp_init(&z)) != MP_OKAY) {
|
||||
goto __Y;
|
||||
}
|
||||
|
||||
/* now loop making the single digit */
|
||||
while (mp_count_bits(&a) < k) {
|
||||
printf("prime is %4d bits left\r", k - mp_count_bits(&a)); fflush(stdout);
|
||||
top:
|
||||
mp_set(&b, prime_digit());
|
||||
|
||||
/* now compute z = a * b * 2 */
|
||||
if ((res = mp_mul(&a, &b, &z)) != MP_OKAY) { /* z = a * b */
|
||||
goto __Z;
|
||||
}
|
||||
|
||||
if ((res = mp_copy(&z, &c)) != MP_OKAY) { /* c = a * b */
|
||||
goto __Z;
|
||||
}
|
||||
|
||||
if ((res = mp_mul_2(&z, &z)) != MP_OKAY) { /* z = 2 * a * b */
|
||||
goto __Z;
|
||||
}
|
||||
|
||||
/* n = z + 1 */
|
||||
if ((res = mp_add_d(&z, 1, &n)) != MP_OKAY) { /* n = z + 1 */
|
||||
goto __Z;
|
||||
}
|
||||
|
||||
/* check (n, v) == 1 */
|
||||
if ((res = mp_gcd(&n, &v, &y)) != MP_OKAY) { /* y = (n, v) */
|
||||
goto __Z;
|
||||
}
|
||||
|
||||
if (mp_cmp_d(&y, 1) != MP_EQ) goto top;
|
||||
|
||||
/* now try base x=2 */
|
||||
mp_set(&x, 2);
|
||||
|
||||
/* compute x^a mod n */
|
||||
if ((res = mp_exptmod(&x, &a, &n, &y)) != MP_OKAY) { /* y = x^a mod n */
|
||||
goto __Z;
|
||||
}
|
||||
|
||||
/* if y == 1 loop */
|
||||
if (mp_cmp_d(&y, 1) == MP_EQ) goto top;
|
||||
|
||||
/* now x^2a mod n */
|
||||
if ((res = mp_sqrmod(&y, &n, &y)) != MP_OKAY) { /* y = x^2a mod n */
|
||||
goto __Z;
|
||||
}
|
||||
|
||||
if (mp_cmp_d(&y, 1) == MP_EQ) goto top;
|
||||
|
||||
/* compute x^b mod n */
|
||||
if ((res = mp_exptmod(&x, &b, &n, &y)) != MP_OKAY) { /* y = x^b mod n */
|
||||
goto __Z;
|
||||
}
|
||||
|
||||
/* if y == 1 loop */
|
||||
if (mp_cmp_d(&y, 1) == MP_EQ) goto top;
|
||||
|
||||
/* now x^2b mod n */
|
||||
if ((res = mp_sqrmod(&y, &n, &y)) != MP_OKAY) { /* y = x^2b mod n */
|
||||
goto __Z;
|
||||
}
|
||||
|
||||
if (mp_cmp_d(&y, 1) == MP_EQ) goto top;
|
||||
|
||||
|
||||
/* compute x^c mod n == x^ab mod n */
|
||||
if ((res = mp_exptmod(&x, &c, &n, &y)) != MP_OKAY) { /* y = x^ab mod n */
|
||||
goto __Z;
|
||||
}
|
||||
|
||||
/* if y == 1 loop */
|
||||
if (mp_cmp_d(&y, 1) == MP_EQ) goto top;
|
||||
|
||||
/* now compute (x^c mod n)^2 */
|
||||
if ((res = mp_sqrmod(&y, &n, &y)) != MP_OKAY) { /* y = x^2ab mod n */
|
||||
goto __Z;
|
||||
}
|
||||
|
||||
/* y should be 1 */
|
||||
if (mp_cmp_d(&y, 1) != MP_EQ) goto top;
|
||||
|
||||
/*
|
||||
{
|
||||
char buf[4096];
|
||||
|
||||
mp_toradix(&n, buf, 10);
|
||||
printf("Certificate of primality for:\n%s\n\n", buf);
|
||||
mp_toradix(&a, buf, 10);
|
||||
printf("A == \n%s\n\n", buf);
|
||||
mp_toradix(&b, buf, 10);
|
||||
printf("B == \n%s\n", buf);
|
||||
printf("----------------------------------------------------------------\n");
|
||||
}
|
||||
*/
|
||||
/* a = n */
|
||||
mp_copy(&n, &a);
|
||||
}
|
||||
|
||||
mp_exch(&n, p);
|
||||
mp_exch(&b, q);
|
||||
|
||||
res = MP_OKAY;
|
||||
__Z: mp_clear(&z);
|
||||
__Y: mp_clear(&y);
|
||||
__X: mp_clear(&x);
|
||||
__N: mp_clear(&n);
|
||||
__B: mp_clear(&b);
|
||||
__A: mp_clear(&a);
|
||||
__V: mp_clear(&v);
|
||||
__C: mp_clear(&c);
|
||||
return res;
|
||||
}
|
||||
|
||||
|
||||
int main(void)
|
||||
{
|
||||
mp_int p, q;
|
||||
char buf[4096];
|
||||
int k;
|
||||
clock_t t1;
|
||||
|
||||
srand(time(NULL));
|
||||
|
||||
printf("Enter # of bits: \n");
|
||||
scanf("%d", &k);
|
||||
|
||||
mp_init(&p);
|
||||
mp_init(&q);
|
||||
|
||||
t1 = clock();
|
||||
pprime(k, &p, &q);
|
||||
t1 = clock() - t1;
|
||||
|
||||
printf("\n\nTook %ld ticks, %d bits\n", t1, mp_count_bits(&p));
|
||||
|
||||
mp_toradix(&p, buf, 10);
|
||||
printf("P == %s\n", buf);
|
||||
mp_toradix(&q, buf, 10);
|
||||
printf("Q == %s\n", buf);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
13
makefile
13
makefile
@ -1,14 +1,19 @@
|
||||
CC = gcc
|
||||
CFLAGS += -DDEBUG -Wall -W -O3 -fomit-frame-pointer -funroll-loops
|
||||
CFLAGS += -Wall -W -O3 -fomit-frame-pointer -funroll-loops
|
||||
|
||||
VERSION=0.07
|
||||
VERSION=0.08
|
||||
|
||||
default: test
|
||||
|
||||
test: bn.o demo.o
|
||||
test: bn.o demo.o
|
||||
$(CC) bn.o demo.o -o demo
|
||||
cd mtest ; gcc -O3 -fomit-frame-pointer -funroll-loops mtest.c -o mtest.exe -s
|
||||
|
||||
# builds the x86 demo
|
||||
test86:
|
||||
nasm -f coff timer.asm
|
||||
$(CC) -DDEBUG -DTIMER_X86 $(CFLAGS) bn.c demo.c timer.o -o demo -s
|
||||
|
||||
docdvi: bn.tex
|
||||
latex bn
|
||||
|
||||
@ -17,7 +22,7 @@ docs: docdvi
|
||||
rm -f bn.log bn.aux bn.dvi
|
||||
|
||||
clean:
|
||||
rm -f *.o *.exe mtest/*.exe bn.log bn.aux bn.dvi *.s
|
||||
rm -f *.pdf *.o *.exe mtest/*.exe etc/*.exe bn.log bn.aux bn.dvi *.s
|
||||
|
||||
zipup: clean docs
|
||||
chdir .. ; rm -rf ltm* libtommath-$(VERSION) ; mkdir libtommath-$(VERSION) ; \
|
||||
|
@ -41,7 +41,7 @@ void rand_num(mp_int *a)
|
||||
unsigned char buf[512];
|
||||
|
||||
top:
|
||||
size = 1 + ((fgetc(rng)*fgetc(rng)) % 512);
|
||||
size = 1 + ((fgetc(rng)*fgetc(rng)) % 32);
|
||||
buf[0] = (fgetc(rng)&1)?1:0;
|
||||
fread(buf+1, 1, size, rng);
|
||||
for (n = 0; n < size; n++) {
|
||||
@ -57,7 +57,7 @@ void rand_num2(mp_int *a)
|
||||
unsigned char buf[512];
|
||||
|
||||
top:
|
||||
size = 1 + ((fgetc(rng)*fgetc(rng)) % 512);
|
||||
size = 1 + ((fgetc(rng)*fgetc(rng)) % 32);
|
||||
buf[0] = (fgetc(rng)&1)?1:0;
|
||||
fread(buf+1, 1, size, rng);
|
||||
for (n = 0; n < size; n++) {
|
||||
@ -80,6 +80,13 @@ int main(void)
|
||||
mp_init(&e);
|
||||
|
||||
rng = fopen("/dev/urandom", "rb");
|
||||
if (rng == NULL) {
|
||||
rng = fopen("/dev/random", "rb");
|
||||
if (rng == NULL) {
|
||||
fprintf(stderr, "\nWarning: stdin used as random source\n\n");
|
||||
rng = stdin;
|
||||
}
|
||||
}
|
||||
|
||||
for (;;) {
|
||||
n = fgetc(rng) % 11;
|
||||
|
Loading…
Reference in New Issue
Block a user