added libtommath-0.04
This commit is contained in:
parent
f89172c3af
commit
e051ed159b
9
bn.h
9
bn.h
@ -83,6 +83,9 @@ int mp_init(mp_int *a);
|
|||||||
/* free a bignum */
|
/* free a bignum */
|
||||||
void mp_clear(mp_int *a);
|
void mp_clear(mp_int *a);
|
||||||
|
|
||||||
|
/* exchange two ints */
|
||||||
|
void mp_exch(mp_int *a, mp_int *b);
|
||||||
|
|
||||||
/* shrink ram required for a bignum */
|
/* shrink ram required for a bignum */
|
||||||
int mp_shrink(mp_int *a);
|
int mp_shrink(mp_int *a);
|
||||||
|
|
||||||
@ -214,7 +217,11 @@ int mp_lcm(mp_int *a, mp_int *b, mp_int *c);
|
|||||||
/* used to setup the Barrett reduction for a given modulus b */
|
/* used to setup the Barrett reduction for a given modulus b */
|
||||||
int mp_reduce_setup(mp_int *a, mp_int *b);
|
int mp_reduce_setup(mp_int *a, mp_int *b);
|
||||||
|
|
||||||
/* Barrett Reduction, computes a (mod b) with a precomputed value c */
|
/* Barrett Reduction, computes a (mod b) with a precomputed value c
|
||||||
|
*
|
||||||
|
* Assumes that 0 < a <= b^2, note if 0 > a > -(b^2) then you can merely
|
||||||
|
* compute the reduction as -1 * mp_reduce(mp_abs(a)) [pseudo code].
|
||||||
|
*/
|
||||||
int mp_reduce(mp_int *a, mp_int *b, mp_int *c);
|
int mp_reduce(mp_int *a, mp_int *b, mp_int *c);
|
||||||
|
|
||||||
/* d = a^b (mod c) */
|
/* d = a^b (mod c) */
|
||||||
|
163
bn.tex
163
bn.tex
@ -1,7 +1,7 @@
|
|||||||
\documentclass{article}
|
\documentclass{article}
|
||||||
\begin{document}
|
\begin{document}
|
||||||
|
|
||||||
\title{LibTomMath v0.03 \\ A Free Multiple Precision Integer Library}
|
\title{LibTomMath v0.04 \\ A Free Multiple Precision Integer Library}
|
||||||
\author{Tom St Denis \\ tomstdenis@iahu.ca}
|
\author{Tom St Denis \\ tomstdenis@iahu.ca}
|
||||||
\maketitle
|
\maketitle
|
||||||
\newpage
|
\newpage
|
||||||
@ -323,46 +323,161 @@ The integers are stored in big endian format as most libraries (and MPI) expect.
|
|||||||
\textbf{mp\_toradix} functions read and write (respectively) null terminated ASCII strings in a given radix. Valid values
|
\textbf{mp\_toradix} functions read and write (respectively) null terminated ASCII strings in a given radix. Valid values
|
||||||
for the radix are between 2 and 64 (inclusively).
|
for the radix are between 2 and 64 (inclusively).
|
||||||
|
|
||||||
|
\section{Function Analysis}
|
||||||
|
|
||||||
|
Throughout the function analysis the variable $N$ will denote the average size of an input to a function as measured
|
||||||
|
by the number of digits it has. The variable $W$ will denote the number of bits per word and $c$ will denote a small
|
||||||
|
constant amount of work. The big-oh notation will be abused slightly to consider numbers that do not grow to infinity.
|
||||||
|
That is we shall consider $O(N/2) \ne O(N)$ which is an abuse of the notation.
|
||||||
|
|
||||||
|
\subsection{Digit Manipulation Functions}
|
||||||
|
The class of digit manipulation functions such as \textbf{mp\_rshd}, \textbf{mp\_lshd} and \textbf{mp\_mul\_2} are all
|
||||||
|
very simple functions to analyze.
|
||||||
|
|
||||||
|
\subsubsection{mp\_rshd(mp\_int *a, int b)}
|
||||||
|
If the shift count ``b'' is less than or equal to zero the function returns without doing any work. If the
|
||||||
|
the shift count is larger than the number of digits in ``a'' then ``a'' is simply zeroed without shifting digits.
|
||||||
|
|
||||||
|
This function requires no additional memory and $O(N)$ time.
|
||||||
|
|
||||||
|
\subsubsection{mp\_lshd(mp\_int *a, int b)}
|
||||||
|
If the shift count ``b'' is less than or equal to zero the function returns success without doing any work.
|
||||||
|
|
||||||
|
This function requires $O(b)$ additional digits of memory and $O(N)$ time.
|
||||||
|
|
||||||
|
\subsubsection{mp\_div\_2d(mp\_int *a, int b, mp\_int *c, mp\_int *d)}
|
||||||
|
If the shift count ``b'' is less than or equal to zero the function places ``a'' in ``c'' and returns success.
|
||||||
|
|
||||||
|
This function requires $O(2 \cdot N)$ additional digits of memory and $O(2 \cdot N)$ time.
|
||||||
|
|
||||||
|
\subsubsection{mp\_mul\_2d(mp\_int *a, int b, mp\_int *c)}
|
||||||
|
If the shift count ``b'' is less than or equal to zero the function places ``a'' in ``c'' and returns success.
|
||||||
|
|
||||||
|
This function requires $O(N)$ additional digits of memory and $O(2 \cdot N)$ time.
|
||||||
|
|
||||||
|
\subsubsection{mp\_mod\_2d(mp\_int *a, int b, mp\_int *c)}
|
||||||
|
If the shift count ``b'' is less than or equal to zero the function places ``a'' in ``c'' and returns success.
|
||||||
|
|
||||||
|
This function requires $O(N)$ additional digits of memory and $O(2 \cdot N)$ time.
|
||||||
|
|
||||||
|
\subsection{Basic Arithmetic}
|
||||||
|
|
||||||
|
\subsubsection{mp\_cmp(mp\_int *a, mp\_int *b)}
|
||||||
|
Performs a \textbf{signed} comparison between ``a'' and ``b'' returning
|
||||||
|
\textbf{MP\_GT} is ``a'' is larger than ``b''.
|
||||||
|
|
||||||
|
This function requires no additional memory and $O(N)$ time.
|
||||||
|
|
||||||
|
\subsubsection{mp\_cmp\_mag(mp\_int *a, mp\_int *b)}
|
||||||
|
Performs a \textbf{unsigned} comparison between ``a'' and ``b'' returning
|
||||||
|
\textbf{MP\_GT} is ``a'' is larger than ``b''. Note that this comparison is unsigned which means it will report, for
|
||||||
|
example, $-5 > 3$. By comparison mp\_cmp will report $-5 < 3$.
|
||||||
|
|
||||||
|
This function requires no additional memory and $O(N)$ time.
|
||||||
|
|
||||||
|
\subsubsection{mp\_add(mp\_int *a, mp\_int *b, mp\_int *c)}
|
||||||
|
Handles the sign of the numbers correctly which means it will subtract as required, e.g. $a + -b$ turns into $a - b$.
|
||||||
|
|
||||||
|
This function requires no additional memory and $O(N)$ time.
|
||||||
|
|
||||||
|
\subsubsection{mp\_sub(mp\_int *a, mp\_int *b, mp\_int *c)}
|
||||||
|
Handles the sign of the numbers correctly which means it will add as required, e.g. $a - -b$ turns into $a + b$.
|
||||||
|
|
||||||
|
This function requires no additional memory and $O(N)$ time.
|
||||||
|
|
||||||
|
\subsubsection{mp\_mul(mp\_int *a, mp\_int *b, mp\_int *c)}
|
||||||
|
Handles the sign of the numbers correctly which means it will correct the sign of the product as required,
|
||||||
|
e.g. $a \cdot -b$ turns into $-ab$.
|
||||||
|
|
||||||
|
For relatively small inputs, that is less than 80 digits a standard baseline or comba-baseline multiplier is used. It
|
||||||
|
requires no additional memory and $O(N^2)$ time. The comba-baseline multiplier is only used if it can safely be used
|
||||||
|
without losing carry digits. The comba method is faster than the baseline method but cannot always be used which is why
|
||||||
|
both are provided. The code will automatically determine when it can be used. If the digit count is higher
|
||||||
|
than 80 for the inputs than a Karatsuba multiplier is used which requires approximately $O(6 \cdot N)$ memory and
|
||||||
|
$O(N^{lg(3)})$ time.
|
||||||
|
|
||||||
|
\subsubsection{mp\_sqr(mp\_int *a, mp\_int *b)}
|
||||||
|
For relatively small inputs, that is less than 80 digits a modified squaring or comba-squaring algorithm is used. It
|
||||||
|
requires no additional memory and $O((N^2 + N)/2)$ time. The comba-squaring method is used only if it can be safely used
|
||||||
|
without losing carry digits. After 80 digits a Karatsuba squaring algorithm is used whcih requires approximately
|
||||||
|
$O(4 \cdot N)$ memory and $O(N^{lg(3)})$ time.
|
||||||
|
|
||||||
|
\subsubsection{mp\_div(mp\_int *a, mp\_int *b, mp\_int *c, mp\_int *d)}
|
||||||
|
The quotient is placed in ``c'' and the remainder in ``d''. Either (or both) of ``c'' and ``d'' can be set to NULL
|
||||||
|
if the value is not desired.
|
||||||
|
|
||||||
|
This function requires $O(4 \cdot N)$ memory and $O(N^2 + N)$ time.
|
||||||
|
|
||||||
|
\subsection{Modular Arithmetic}
|
||||||
|
|
||||||
|
\subsubsection{mp\_addmod, mp\_submod, mp\_mulmod, mp\_sqrmod}
|
||||||
|
These functions take the time of their host function plus the time it takes to perform a division. For example,
|
||||||
|
mp\_addmod takes $O(N + (N^2 + N))$ time. Note that if you are performing many modular operations in a row with
|
||||||
|
the same modulus you should consider Barrett reductions.
|
||||||
|
|
||||||
|
NOTE: This section will be expanded upon in future releases of the library.
|
||||||
|
|
||||||
|
\subsubsection{mp\_invmod(mp\_int *a, mp\_int *b, mp\_int *c)}
|
||||||
|
This function is technically only defined for moduli who are positive and inputs that are positive. That is it will find
|
||||||
|
$c = 1/a \mbox{ (mod }b\mbox{)}$ for any $a > 0$ and $b > 0$. The function will work for negative values of $a$ since
|
||||||
|
it merely computes $c = -1 \cdot (1/{\vert a \vert}) \mbox{ (mod }b\mbox{)}$. In general the input is only
|
||||||
|
\textbf{guaranteed} to lead to a correct output if $-b < a < b$ and $(a, b) = 1$.
|
||||||
|
|
||||||
|
NOTE: This function will be revised to accept a wider range of inputs in future releases.
|
||||||
|
|
||||||
\section{Timing Analysis}
|
\section{Timing Analysis}
|
||||||
\subsection{Observed Timings}
|
\subsection{Observed Timings}
|
||||||
A simple test program ``demo.c'' was developed which builds with either MPI or LibTomMath (without modification). The
|
A simple test program ``demo.c'' was developed which builds with either MPI or LibTomMath (without modification). The
|
||||||
test was conducted on an AMD Athlon XP processor with 266Mhz DDR memory and the GCC 3.2 compiler\footnote{With build
|
test was conducted on an AMD Athlon XP processor with 266Mhz DDR memory and the GCC 3.2 compiler\footnote{With build
|
||||||
options ``-O3 -fomit-frame-pointer -funroll-loops''}. The multiplications and squarings were repeated 10,000 times
|
options ``-O3 -fomit-frame-pointer -funroll-loops''}. The multiplications and squarings were repeated 100,000 times
|
||||||
each while the modular exponentiation (exptmod) were performed 10 times each. The RDTSC (Read Time Stamp Counter) instruction
|
each while the modular exponentiation (exptmod) were performed 50 times each. The ``inversions'' refers to multiplicative
|
||||||
was used to measure the time the entire iterations took and was divided by the number of iterations to get an
|
inversions modulo an odd number of a given size. The RDTSC (Read Time Stamp Counter) instruction was used to measure the
|
||||||
average. The following results were observed.
|
time the entire iterations took and was divided by the number of iterations to get an average. The following results
|
||||||
|
were observed.
|
||||||
|
|
||||||
\begin{small}
|
\begin{small}
|
||||||
\begin{center}
|
\begin{center}
|
||||||
\begin{tabular}{c|c|c|c}
|
\begin{tabular}{c|c|c|c}
|
||||||
\hline \textbf{Operation} & \textbf{Size (bits)} & \textbf{Time with MPI (cycles)} & \textbf{Time with LibTomMath (cycles)} \\
|
\hline \textbf{Operation} & \textbf{Size (bits)} & \textbf{Time with MPI (cycles)} & \textbf{Time with LibTomMath (cycles)} \\
|
||||||
\hline
|
\hline
|
||||||
Multiply & 128 & 1,426 & 928 \\
|
Inversion & 128 & 264,083 & 172,381 \\
|
||||||
Multiply & 256 & 2,551 & 1,787 \\
|
Inversion & 256 & 549,370 & 381,237 \\
|
||||||
Multiply & 512 & 7,913 & 3,458 \\
|
Inversion & 512 & 1,675,975 & 1,212,341 \\
|
||||||
Multiply & 1024 & 28,496 & 9,271 \\
|
Inversion & 1024 & 5,237,957 & 3,114,144 \\
|
||||||
Multiply & 2048 & 109,897 & 29,917 \\
|
Inversion & 2048 & 17,871,944 & 8,137,896 \\
|
||||||
Multiply & 4096 & 469,970 & 123,934 \\
|
Inversion & 4096 & 66,610,468 & 22,469,360 \\
|
||||||
|
\hline
|
||||||
|
Multiply & 128 & 1,426 & 847 \\
|
||||||
|
Multiply & 256 & 2,551 & 1,848 \\
|
||||||
|
Multiply & 512 & 7,913 & 3,505 \\
|
||||||
|
Multiply & 1024 & 28,496 & 9,097 \\
|
||||||
|
Multiply & 2048 & 109,897 & 29,497 \\
|
||||||
|
Multiply & 4096 & 469,970 & 112,651 \\
|
||||||
\hline
|
\hline
|
||||||
Square & 128 & 1,319 & 1,230 \\
|
Square & 128 & 1,319 & 883 \\
|
||||||
Square & 256 & 1,776 & 2,131 \\
|
Square & 256 & 1,776 & 1,895 \\
|
||||||
Square & 512 & 5,399 & 3,694 \\
|
Square & 512 & 5,399 & 3,543 \\
|
||||||
Square & 1024 & 18,991 & 9,172 \\
|
Square & 1024 & 18,991 & 8,692 \\
|
||||||
Square & 2048 & 72,126 & 27,352 \\
|
Square & 2048 & 72,126 & 26,792 \\
|
||||||
Square & 4096 & 306,269 & 110,607 \\
|
Square & 4096 & 306,269 & 103,263 \\
|
||||||
\hline
|
\hline
|
||||||
Exptmod & 512 & 32,021,586 & 6,880,075 \\
|
Exptmod & 512 & 32,021,586 & 7,096,687 \\
|
||||||
Exptmod & 768 & 97,595,492 & 15,202,614 \\
|
Exptmod & 768 & 97,595,492 & 14,849,813 \\
|
||||||
Exptmod & 1024 & 223,302,532 & 28,081,865 \\
|
Exptmod & 1024 & 223,302,532 & 27,826,489 \\
|
||||||
Exptmod & 2048 & 1,682,223,369 & 146,545,454 \\
|
Exptmod & 2048 & 1,682,223,369 & 142,026,274 \\
|
||||||
Exptmod & 2560 & 3,268,615,571 & 310,970,112 \\
|
Exptmod & 2560 & 3,268,615,571 & 292,597,205 \\
|
||||||
Exptmod & 3072 & 5,597,240,141 & 480,703,712 \\
|
Exptmod & 3072 & 5,597,240,141 & 452,731,243 \\
|
||||||
Exptmod & 4096 & 13,347,270,891 & 985,918,868
|
Exptmod & 4096 & 13,347,270,891 & 941,433,401
|
||||||
|
|
||||||
\end{tabular}
|
\end{tabular}
|
||||||
\end{center}
|
\end{center}
|
||||||
\end{small}
|
\end{small}
|
||||||
|
|
||||||
|
Note that the figures do fluctuate but their magnitudes are relatively intact. The purpose of the chart is not to
|
||||||
|
get an exact timing but to compare the two libraries. For example, in all of the tests the exact time for a 512-bit
|
||||||
|
squaring operation was not the same. The observed times were all approximately 3,500 cycles, more importantly they
|
||||||
|
were always faster than the timings observed with MPI by about the same magnitude.
|
||||||
|
|
||||||
\subsection{Digit Size}
|
\subsection{Digit Size}
|
||||||
The first major constribution to the time savings is the fact that 28 bits are stored per digit instead of the MPI
|
The first major constribution to the time savings is the fact that 28 bits are stored per digit instead of the MPI
|
||||||
defualt of 16. This means in many of the algorithms the savings can be considerable. Consider a baseline multiplier
|
defualt of 16. This means in many of the algorithms the savings can be considerable. Consider a baseline multiplier
|
||||||
|
@ -1,3 +1,10 @@
|
|||||||
|
Dec 29th, 2002
|
||||||
|
v0.04 -- Fixed a memory leak in mp_to_unsigned_bin
|
||||||
|
-- optimized invmod code
|
||||||
|
-- Fixed bug in mp_div
|
||||||
|
-- use exchange instead of copy for results
|
||||||
|
-- added a bit more to the manual
|
||||||
|
|
||||||
Dec 27th, 2002
|
Dec 27th, 2002
|
||||||
v0.03 -- Sped up s_mp_mul_high_digs by not computing the carries of the lower digits
|
v0.03 -- Sped up s_mp_mul_high_digs by not computing the carries of the lower digits
|
||||||
-- Fixed a bug where mp_set_int wouldn't zero the value first and set the used member.
|
-- Fixed a bug where mp_set_int wouldn't zero the value first and set the used member.
|
||||||
|
103
demo.c
103
demo.c
@ -21,15 +21,20 @@ void reset(void) { _tt = clock(); }
|
|||||||
unsigned long long rdtsc(void) { return clock() - _tt; }
|
unsigned long long rdtsc(void) { return clock() - _tt; }
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
void draw(mp_int *a)
|
void ndraw(mp_int *a, char *name)
|
||||||
{
|
{
|
||||||
char buf[4096];
|
char buf[4096];
|
||||||
printf("a->used == %d\na->alloc == %d\na->sign == %d\n", a->used, a->alloc, a->sign);
|
printf("%s: ", name);
|
||||||
mp_toradix(a, buf, 10);
|
mp_toradix(a, buf, 10);
|
||||||
printf("num == %s\n", buf);
|
printf("%s\n", buf);
|
||||||
printf("\n");
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void draw(mp_int *a)
|
||||||
|
{
|
||||||
|
ndraw(a, "");
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
unsigned long lfsr = 0xAAAAAAAAUL;
|
unsigned long lfsr = 0xAAAAAAAAUL;
|
||||||
|
|
||||||
int lbit(void)
|
int lbit(void)
|
||||||
@ -43,7 +48,18 @@ int lbit(void)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef U_MPI
|
||||||
|
int mp_reduce_setup(mp_int *a, mp_int *b)
|
||||||
|
{
|
||||||
|
int res;
|
||||||
|
|
||||||
|
mp_set(a, 1);
|
||||||
|
if ((res = s_mp_lshd(a, b->used * 2)) != MP_OKAY) {
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
return mp_div(a, b, a, NULL);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
int main(void)
|
int main(void)
|
||||||
{
|
{
|
||||||
@ -51,7 +67,6 @@ int main(void)
|
|||||||
unsigned long expt_n, add_n, sub_n, mul_n, div_n, sqr_n, mul2d_n, div2d_n, gcd_n, lcm_n, inv_n;
|
unsigned long expt_n, add_n, sub_n, mul_n, div_n, sqr_n, mul2d_n, div2d_n, gcd_n, lcm_n, inv_n;
|
||||||
unsigned char cmd[4096], buf[4096];
|
unsigned char cmd[4096], buf[4096];
|
||||||
int rr;
|
int rr;
|
||||||
mp_digit tom;
|
|
||||||
|
|
||||||
#ifdef TIMER
|
#ifdef TIMER
|
||||||
int n;
|
int n;
|
||||||
@ -63,35 +78,63 @@ int main(void)
|
|||||||
mp_init(&c);
|
mp_init(&c);
|
||||||
mp_init(&d);
|
mp_init(&d);
|
||||||
mp_init(&e);
|
mp_init(&e);
|
||||||
mp_init(&f);
|
mp_init(&f);
|
||||||
|
|
||||||
mp_read_radix(&a, "-2", 10);
|
mp_read_radix(&a, "V//////////////////////////////////////////////////////////////////////////////////////", 64);
|
||||||
mp_read_radix(&b, "2", 10);
|
mp_reduce_setup(&b, &a);
|
||||||
mp_expt_d(&a, 3, &a);
|
printf("\n\n----\n\n");
|
||||||
draw(&a);
|
mp_toradix(&b, buf, 10);
|
||||||
|
printf("b == %s\n\n\n", buf);
|
||||||
|
|
||||||
|
mp_read_radix(&b, "4982748972349724892742", 10);
|
||||||
|
mp_sub_d(&a, 1, &c);
|
||||||
|
mp_exptmod(&b, &c, &a, &d);
|
||||||
|
mp_toradix(&d, buf, 10);
|
||||||
|
printf("b^p-1 == %s\n", buf);
|
||||||
|
|
||||||
|
|
||||||
#ifdef TIMER
|
#ifdef TIMER
|
||||||
|
|
||||||
mp_read_radix(&a, "340282366920938463463374607431768211455", 10);
|
mp_read_radix(&a, "340282366920938463463374607431768211455", 10);
|
||||||
|
mp_read_radix(&b, "234892374891378913789237289378973232333", 10);
|
||||||
while (a.used * DIGIT_BIT < 8192) {
|
while (a.used * DIGIT_BIT < 8192) {
|
||||||
reset();
|
reset();
|
||||||
for (rr = 0; rr < 100000; rr++) {
|
for (rr = 0; rr < 1000; rr++) {
|
||||||
|
mp_invmod(&b, &a, &c);
|
||||||
|
}
|
||||||
|
tt = rdtsc();
|
||||||
|
mp_mulmod(&b, &c, &a, &d);
|
||||||
|
if (mp_cmp_d(&d, 1) != MP_EQ) {
|
||||||
|
printf("Failed to invert\n");
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
printf("Inverting mod %d-bit took %llu cycles\n", mp_count_bits(&a), tt / ((unsigned long long)1000));
|
||||||
|
mp_sqr(&a, &a);
|
||||||
|
mp_sqr(&b, &b);
|
||||||
|
}
|
||||||
|
|
||||||
|
mp_read_radix(&a, "340282366920938463463374607431768211455", 10);
|
||||||
|
while (a.used * DIGIT_BIT < 8192) {
|
||||||
|
reset();
|
||||||
|
for (rr = 0; rr < 1000000; rr++) {
|
||||||
mp_mul(&a, &a, &b);
|
mp_mul(&a, &a, &b);
|
||||||
}
|
}
|
||||||
tt = rdtsc();
|
tt = rdtsc();
|
||||||
printf("Multiplying %d-bit took %llu cycles\n", mp_count_bits(&a), tt / ((unsigned long long)100000));
|
printf("Multiplying %d-bit took %llu cycles\n", mp_count_bits(&a), tt / ((unsigned long long)1000000));
|
||||||
mp_copy(&b, &a);
|
mp_copy(&b, &a);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
mp_read_radix(&a, "340282366920938463463374607431768211455", 10);
|
mp_read_radix(&a, "340282366920938463463374607431768211455", 10);
|
||||||
while (a.used * DIGIT_BIT < 8192) {
|
while (a.used * DIGIT_BIT < 8192) {
|
||||||
reset();
|
reset();
|
||||||
for (rr = 0; rr < 100000; rr++) {
|
for (rr = 0; rr < 1000000; rr++) {
|
||||||
mp_sqr(&a, &b);
|
mp_sqr(&a, &b);
|
||||||
}
|
}
|
||||||
tt = rdtsc();
|
tt = rdtsc();
|
||||||
printf("Squaring %d-bit took %llu cycles\n", mp_count_bits(&a), tt / ((unsigned long long)100000));
|
printf("Squaring %d-bit took %llu cycles\n", mp_count_bits(&a), tt / ((unsigned long long)1000000));
|
||||||
mp_copy(&b, &a);
|
mp_copy(&b, &a);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -117,7 +160,7 @@ int main(void)
|
|||||||
mp_mod(&b, &c, &b);
|
mp_mod(&b, &c, &b);
|
||||||
mp_set(&c, 3);
|
mp_set(&c, 3);
|
||||||
reset();
|
reset();
|
||||||
for (rr = 0; rr < 35; rr++) {
|
for (rr = 0; rr < 50; rr++) {
|
||||||
mp_exptmod(&c, &b, &a, &d);
|
mp_exptmod(&c, &b, &a, &d);
|
||||||
}
|
}
|
||||||
tt = rdtsc();
|
tt = rdtsc();
|
||||||
@ -130,7 +173,7 @@ int main(void)
|
|||||||
draw(&d);
|
draw(&d);
|
||||||
exit(0);
|
exit(0);
|
||||||
}
|
}
|
||||||
printf("Exponentiating %d-bit took %llu cycles\n", mp_count_bits(&a), tt / ((unsigned long long)35));
|
printf("Exponentiating %d-bit took %llu cycles\n", mp_count_bits(&a), tt / ((unsigned long long)50));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -141,7 +184,7 @@ int main(void)
|
|||||||
printf("%7lu/%7lu/%7lu/%7lu/%7lu/%7lu/%7lu/%7lu/%7lu/%7lu/%7lu\r", add_n, sub_n, mul_n, div_n, sqr_n, mul2d_n, div2d_n, gcd_n, lcm_n, expt_n, inv_n);
|
printf("%7lu/%7lu/%7lu/%7lu/%7lu/%7lu/%7lu/%7lu/%7lu/%7lu/%7lu\r", add_n, sub_n, mul_n, div_n, sqr_n, mul2d_n, div2d_n, gcd_n, lcm_n, expt_n, inv_n);
|
||||||
fgets(cmd, 4095, stdin);
|
fgets(cmd, 4095, stdin);
|
||||||
cmd[strlen(cmd)-1] = 0;
|
cmd[strlen(cmd)-1] = 0;
|
||||||
printf("%s ]\r",cmd);
|
printf("%s ]\r",cmd); fflush(stdout);
|
||||||
if (!strcmp(cmd, "mul2d")) { ++mul2d_n;
|
if (!strcmp(cmd, "mul2d")) { ++mul2d_n;
|
||||||
fgets(buf, 4095, stdin); mp_read_radix(&a, buf, 10);
|
fgets(buf, 4095, stdin); mp_read_radix(&a, buf, 10);
|
||||||
fgets(buf, 4095, stdin); sscanf(buf, "%d", &rr);
|
fgets(buf, 4095, stdin); sscanf(buf, "%d", &rr);
|
||||||
@ -173,7 +216,8 @@ int main(void)
|
|||||||
fgets(buf, 4095, stdin); mp_read_radix(&a, buf, 10);
|
fgets(buf, 4095, stdin); mp_read_radix(&a, buf, 10);
|
||||||
fgets(buf, 4095, stdin); mp_read_radix(&b, buf, 10);
|
fgets(buf, 4095, stdin); mp_read_radix(&b, buf, 10);
|
||||||
fgets(buf, 4095, stdin); mp_read_radix(&c, buf, 10);
|
fgets(buf, 4095, stdin); mp_read_radix(&c, buf, 10);
|
||||||
mp_add(&a, &b, &d);
|
mp_copy(&a, &d);
|
||||||
|
mp_add(&d, &b, &d);
|
||||||
if (mp_cmp(&c, &d) != MP_EQ) {
|
if (mp_cmp(&c, &d) != MP_EQ) {
|
||||||
printf("add %lu failure!\n", add_n);
|
printf("add %lu failure!\n", add_n);
|
||||||
draw(&a);draw(&b);draw(&c);draw(&d);
|
draw(&a);draw(&b);draw(&c);draw(&d);
|
||||||
@ -204,13 +248,13 @@ draw(&a);draw(&b);draw(&c);draw(&d);
|
|||||||
draw(&d);
|
draw(&d);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
} else if (!strcmp(cmd, "sub")) { ++sub_n;
|
} else if (!strcmp(cmd, "sub")) { ++sub_n;
|
||||||
fgets(buf, 4095, stdin); mp_read_radix(&a, buf, 10);
|
fgets(buf, 4095, stdin); mp_read_radix(&a, buf, 10);
|
||||||
fgets(buf, 4095, stdin); mp_read_radix(&b, buf, 10);
|
fgets(buf, 4095, stdin); mp_read_radix(&b, buf, 10);
|
||||||
fgets(buf, 4095, stdin); mp_read_radix(&c, buf, 10);
|
fgets(buf, 4095, stdin); mp_read_radix(&c, buf, 10);
|
||||||
mp_sub(&a, &b, &d);
|
mp_copy(&a, &d);
|
||||||
|
mp_sub(&d, &b, &d);
|
||||||
if (mp_cmp(&c, &d) != MP_EQ) {
|
if (mp_cmp(&c, &d) != MP_EQ) {
|
||||||
printf("sub %lu failure!\n", sub_n);
|
printf("sub %lu failure!\n", sub_n);
|
||||||
draw(&a);draw(&b);draw(&c);draw(&d);
|
draw(&a);draw(&b);draw(&c);draw(&d);
|
||||||
@ -220,7 +264,8 @@ draw(&a);draw(&b);draw(&c);draw(&d);
|
|||||||
fgets(buf, 4095, stdin); mp_read_radix(&a, buf, 10);
|
fgets(buf, 4095, stdin); mp_read_radix(&a, buf, 10);
|
||||||
fgets(buf, 4095, stdin); mp_read_radix(&b, buf, 10);
|
fgets(buf, 4095, stdin); mp_read_radix(&b, buf, 10);
|
||||||
fgets(buf, 4095, stdin); mp_read_radix(&c, buf, 10);
|
fgets(buf, 4095, stdin); mp_read_radix(&c, buf, 10);
|
||||||
mp_mul(&a, &b, &d);
|
mp_copy(&a, &d);
|
||||||
|
mp_mul(&d, &b, &d);
|
||||||
if (mp_cmp(&c, &d) != MP_EQ) {
|
if (mp_cmp(&c, &d) != MP_EQ) {
|
||||||
printf("mul %lu failure!\n", mul_n);
|
printf("mul %lu failure!\n", mul_n);
|
||||||
draw(&a);draw(&b);draw(&c);draw(&d);
|
draw(&a);draw(&b);draw(&c);draw(&d);
|
||||||
@ -242,7 +287,8 @@ draw(&a);draw(&b);draw(&c);draw(&d); draw(&e); draw(&f);
|
|||||||
} else if (!strcmp(cmd, "sqr")) { ++sqr_n;
|
} else if (!strcmp(cmd, "sqr")) { ++sqr_n;
|
||||||
fgets(buf, 4095, stdin); mp_read_radix(&a, buf, 10);
|
fgets(buf, 4095, stdin); mp_read_radix(&a, buf, 10);
|
||||||
fgets(buf, 4095, stdin); mp_read_radix(&b, buf, 10);
|
fgets(buf, 4095, stdin); mp_read_radix(&b, buf, 10);
|
||||||
mp_sqr(&a, &c);
|
mp_copy(&a, &c);
|
||||||
|
mp_sqr(&c, &c);
|
||||||
if (mp_cmp(&b, &c) != MP_EQ) {
|
if (mp_cmp(&b, &c) != MP_EQ) {
|
||||||
printf("sqr %lu failure!\n", sqr_n);
|
printf("sqr %lu failure!\n", sqr_n);
|
||||||
draw(&a);draw(&b);draw(&c);
|
draw(&a);draw(&b);draw(&c);
|
||||||
@ -252,7 +298,8 @@ draw(&a);draw(&b);draw(&c);
|
|||||||
fgets(buf, 4095, stdin); mp_read_radix(&a, buf, 10);
|
fgets(buf, 4095, stdin); mp_read_radix(&a, buf, 10);
|
||||||
fgets(buf, 4095, stdin); mp_read_radix(&b, buf, 10);
|
fgets(buf, 4095, stdin); mp_read_radix(&b, buf, 10);
|
||||||
fgets(buf, 4095, stdin); mp_read_radix(&c, buf, 10);
|
fgets(buf, 4095, stdin); mp_read_radix(&c, buf, 10);
|
||||||
mp_gcd(&a, &b, &d);
|
mp_copy(&a, &d);
|
||||||
|
mp_gcd(&d, &b, &d);
|
||||||
d.sign = c.sign;
|
d.sign = c.sign;
|
||||||
if (mp_cmp(&c, &d) != MP_EQ) {
|
if (mp_cmp(&c, &d) != MP_EQ) {
|
||||||
printf("gcd %lu failure!\n", gcd_n);
|
printf("gcd %lu failure!\n", gcd_n);
|
||||||
@ -263,7 +310,8 @@ draw(&a);draw(&b);draw(&c);draw(&d);
|
|||||||
fgets(buf, 4095, stdin); mp_read_radix(&a, buf, 10);
|
fgets(buf, 4095, stdin); mp_read_radix(&a, buf, 10);
|
||||||
fgets(buf, 4095, stdin); mp_read_radix(&b, buf, 10);
|
fgets(buf, 4095, stdin); mp_read_radix(&b, buf, 10);
|
||||||
fgets(buf, 4095, stdin); mp_read_radix(&c, buf, 10);
|
fgets(buf, 4095, stdin); mp_read_radix(&c, buf, 10);
|
||||||
mp_lcm(&a, &b, &d);
|
mp_copy(&a, &d);
|
||||||
|
mp_lcm(&d, &b, &d);
|
||||||
d.sign = c.sign;
|
d.sign = c.sign;
|
||||||
if (mp_cmp(&c, &d) != MP_EQ) {
|
if (mp_cmp(&c, &d) != MP_EQ) {
|
||||||
printf("lcm %lu failure!\n", lcm_n);
|
printf("lcm %lu failure!\n", lcm_n);
|
||||||
@ -275,7 +323,8 @@ draw(&a);draw(&b);draw(&c);draw(&d);
|
|||||||
fgets(buf, 4095, stdin); mp_read_radix(&b, buf, 10);
|
fgets(buf, 4095, stdin); mp_read_radix(&b, buf, 10);
|
||||||
fgets(buf, 4095, stdin); mp_read_radix(&c, buf, 10);
|
fgets(buf, 4095, stdin); mp_read_radix(&c, buf, 10);
|
||||||
fgets(buf, 4095, stdin); mp_read_radix(&d, buf, 10);
|
fgets(buf, 4095, stdin); mp_read_radix(&d, buf, 10);
|
||||||
mp_exptmod(&a, &b, &c, &e);
|
mp_copy(&a, &e);
|
||||||
|
mp_exptmod(&e, &b, &c, &e);
|
||||||
if (mp_cmp(&d, &e) != MP_EQ) {
|
if (mp_cmp(&d, &e) != MP_EQ) {
|
||||||
printf("expt %lu failure!\n", expt_n);
|
printf("expt %lu failure!\n", expt_n);
|
||||||
draw(&a);draw(&b);draw(&c);draw(&d); draw(&e);
|
draw(&a);draw(&b);draw(&c);draw(&d); draw(&e);
|
||||||
|
4
makefile
4
makefile
@ -1,7 +1,7 @@
|
|||||||
CC = gcc
|
CC = gcc
|
||||||
CFLAGS += -Wall -W -O3 -funroll-loops
|
CFLAGS += -DDEBUG -Wall -W -Os
|
||||||
|
|
||||||
VERSION=0.03
|
VERSION=0.04
|
||||||
|
|
||||||
default: test
|
default: test
|
||||||
|
|
||||||
|
@ -41,7 +41,7 @@ void rand_num(mp_int *a)
|
|||||||
unsigned char buf[512];
|
unsigned char buf[512];
|
||||||
|
|
||||||
top:
|
top:
|
||||||
size = 1 + (fgetc(rng) % 96);
|
size = 1 + ((fgetc(rng)*fgetc(rng)) % 32);
|
||||||
buf[0] = (fgetc(rng)&1)?1:0;
|
buf[0] = (fgetc(rng)&1)?1:0;
|
||||||
fread(buf+1, 1, size, rng);
|
fread(buf+1, 1, size, rng);
|
||||||
for (n = 0; n < size; n++) {
|
for (n = 0; n < size; n++) {
|
||||||
@ -57,7 +57,7 @@ void rand_num2(mp_int *a)
|
|||||||
unsigned char buf[512];
|
unsigned char buf[512];
|
||||||
|
|
||||||
top:
|
top:
|
||||||
size = 1 + (fgetc(rng) % 128);
|
size = 1 + ((fgetc(rng)*fgetc(rng)) % 32);
|
||||||
buf[0] = (fgetc(rng)&1)?1:0;
|
buf[0] = (fgetc(rng)&1)?1:0;
|
||||||
fread(buf+1, 1, size, rng);
|
fread(buf+1, 1, size, rng);
|
||||||
for (n = 0; n < size; n++) {
|
for (n = 0; n < size; n++) {
|
||||||
@ -196,7 +196,7 @@ int main(void)
|
|||||||
mp_todecimal(&c, buf);
|
mp_todecimal(&c, buf);
|
||||||
printf("%s\n", buf);
|
printf("%s\n", buf);
|
||||||
} else if (n == 9) {
|
} else if (n == 9) {
|
||||||
/* lcm test */
|
/* exptmod test */
|
||||||
rand_num2(&a);
|
rand_num2(&a);
|
||||||
rand_num2(&b);
|
rand_num2(&b);
|
||||||
rand_num2(&c);
|
rand_num2(&c);
|
||||||
@ -216,8 +216,10 @@ int main(void)
|
|||||||
rand_num2(&a);
|
rand_num2(&a);
|
||||||
rand_num2(&b);
|
rand_num2(&b);
|
||||||
b.sign = MP_ZPOS;
|
b.sign = MP_ZPOS;
|
||||||
|
a.sign = MP_ZPOS;
|
||||||
mp_gcd(&a, &b, &c);
|
mp_gcd(&a, &b, &c);
|
||||||
if (mp_cmp_d(&c, 1) != 0) continue;
|
if (mp_cmp_d(&c, 1) != 0) continue;
|
||||||
|
if (mp_cmp_d(&b, 1) == 0) continue;
|
||||||
mp_invmod(&a, &b, &c);
|
mp_invmod(&a, &b, &c);
|
||||||
printf("invmod\n");
|
printf("invmod\n");
|
||||||
mp_todecimal(&a, buf);
|
mp_todecimal(&a, buf);
|
||||||
|
Loading…
Reference in New Issue
Block a user