diff --git a/.travis.yml b/.travis.yml index 6c78689..a269c4e 100644 --- a/.travis.yml +++ b/.travis.yml @@ -144,6 +144,7 @@ matrix: # clang for x86-64 architecture (64-bit longs and 64-bit pointers) - env: SANITIZER=1 CONV_WARNINGS=relaxed BUILDOPTIONS='--with-cc=clang-7 --with-m64 --with-travis-valgrind' - env: SANITIZER=1 CONV_WARNINGS=strict BUILDOPTIONS='--with-cc=clang-7 --with-m64 --with-travis-valgrind' + - env: SANITIZER=1 CONV_WARNINGS=strict BUILDOPTIONS='--with-cc=clang-7 --cflags=-DMP_USE_MEMOPS --with-m64 --with-travis-valgrind' - env: SANITIZER=1 CONV_WARNINGS=strict BUILDOPTIONS='--with-cc=clang-7 --c89 --with-m64 --with-travis-valgrind' - env: SANITIZER=1 BUILDOPTIONS='--with-cc=clang-7 --with-m64 --with-travis-valgrind --cflags=-DMP_PREC=MP_MIN_PREC' - env: SANITIZER=1 BUILDOPTIONS='--with-cc=clang-6.0 --with-m64 --with-travis-valgrind' diff --git a/demo/mtest_opponent.c b/demo/mtest_opponent.c index 1beab5f..648a654 100644 --- a/demo/mtest_opponent.c +++ b/demo/mtest_opponent.c @@ -37,8 +37,8 @@ static int mtest_opponent(void) #ifndef MP_FIXED_CUTOFFS /* force KARA and TOOM to enable despite cutoffs */ - MP_KARATSUBA_SQR_CUTOFF = MP_KARATSUBA_MUL_CUTOFF = 8; - MP_TOOM_SQR_CUTOFF = MP_TOOM_MUL_CUTOFF = 16; + MP_SQR_KARATSUBA_CUTOFF = MP_MUL_KARATSUBA_CUTOFF = 8; + MP_SQR_TOOM_CUTOFF = MP_MUL_TOOM_CUTOFF = 16; #endif for (;;) { diff --git a/demo/test.c b/demo/test.c index 8e37f71..3b97873 100644 --- a/demo/test.c +++ b/demo/test.c @@ -1866,7 +1866,7 @@ LBL_ERR: return EXIT_FAILURE; } -static int test_s_mp_balance_mul(void) +static int test_s_mp_mul_balance(void) { mp_int a, b, c; @@ -1881,7 +1881,7 @@ static int test_s_mp_balance_mul(void) DO(mp_read_radix(&a, na, 64)); DO(mp_read_radix(&b, nb, 64)); - DO(s_mp_balance_mul(&a, &b, &c)); + DO(s_mp_mul_balance(&a, &b, &c)); DO(mp_read_radix(&b, nc, 64)); @@ -1896,18 +1896,18 @@ LBL_ERR: return EXIT_FAILURE; } -#define s_mp_mul(a, b, c) s_mp_mul_digs(a, b, c, (a)->used + (b)->used + 1) -static int test_s_mp_karatsuba_mul(void) +#define s_mp_mul_full(a, b, c) s_mp_mul(a, b, c, (a)->used + (b)->used + 1) +static int test_s_mp_mul_karatsuba(void) { mp_int a, b, c, d; int size; DOR(mp_init_multi(&a, &b, &c, &d, NULL)); - for (size = MP_KARATSUBA_MUL_CUTOFF; size < MP_KARATSUBA_MUL_CUTOFF + 20; size++) { + for (size = MP_MUL_KARATSUBA_CUTOFF; size < MP_MUL_KARATSUBA_CUTOFF + 20; size++) { DO(mp_rand(&a, size)); DO(mp_rand(&b, size)); - DO(s_mp_karatsuba_mul(&a, &b, &c)); - DO(s_mp_mul(&a,&b,&d)); + DO(s_mp_mul_karatsuba(&a, &b, &c)); + DO(s_mp_mul_full(&a,&b,&d)); if (mp_cmp(&c, &d) != MP_EQ) { fprintf(stderr, "Karatsuba multiplication failed at size %d\n", size); goto LBL_ERR; @@ -1921,15 +1921,15 @@ LBL_ERR: return EXIT_FAILURE; } -static int test_s_mp_karatsuba_sqr(void) +static int test_s_mp_sqr_karatsuba(void) { mp_int a, b, c; int size; DOR(mp_init_multi(&a, &b, &c, NULL)); - for (size = MP_KARATSUBA_SQR_CUTOFF; size < MP_KARATSUBA_SQR_CUTOFF + 20; size++) { + for (size = MP_SQR_KARATSUBA_CUTOFF; size < MP_SQR_KARATSUBA_CUTOFF + 20; size++) { DO(mp_rand(&a, size)); - DO(s_mp_karatsuba_sqr(&a, &b)); + DO(s_mp_sqr_karatsuba(&a, &b)); DO(s_mp_sqr(&a, &c)); if (mp_cmp(&b, &c) != MP_EQ) { fprintf(stderr, "Karatsuba squaring failed at size %d\n", size); @@ -1944,7 +1944,7 @@ LBL_ERR: return EXIT_FAILURE; } -static int test_s_mp_toom_mul(void) +static int test_s_mp_mul_toom(void) { mp_int a, b, c, d; int size; @@ -1965,10 +1965,10 @@ static int test_s_mp_toom_mul(void) DO(mp_2expt(&c, 99000 - 1000)); DO(mp_add(&b, &c, &b)); - tc_cutoff = MP_TOOM_MUL_CUTOFF; - MP_TOOM_MUL_CUTOFF = INT_MAX; + tc_cutoff = MP_MUL_TOOM_CUTOFF; + MP_MUL_TOOM_CUTOFF = INT_MAX; DO(mp_mul(&a, &b, &c)); - MP_TOOM_MUL_CUTOFF = tc_cutoff; + MP_MUL_TOOM_CUTOFF = tc_cutoff; DO(mp_mul(&a, &b, &d)); if (mp_cmp(&c, &d) != MP_EQ) { fprintf(stderr, "Toom-Cook 3-way multiplication failed for edgecase f1 * f2\n"); @@ -1976,11 +1976,11 @@ static int test_s_mp_toom_mul(void) } #endif - for (size = MP_TOOM_MUL_CUTOFF; size < MP_TOOM_MUL_CUTOFF + 20; size++) { + for (size = MP_MUL_TOOM_CUTOFF; size < MP_MUL_TOOM_CUTOFF + 20; size++) { DO(mp_rand(&a, size)); DO(mp_rand(&b, size)); - DO(s_mp_toom_mul(&a, &b, &c)); - DO(s_mp_mul(&a,&b,&d)); + DO(s_mp_mul_toom(&a, &b, &c)); + DO(s_mp_mul_full(&a,&b,&d)); if (mp_cmp(&c, &d) != MP_EQ) { fprintf(stderr, "Toom-Cook 3-way multiplication failed at size %d\n", size); goto LBL_ERR; @@ -1994,15 +1994,15 @@ LBL_ERR: return EXIT_FAILURE; } -static int test_s_mp_toom_sqr(void) +static int test_s_mp_sqr_toom(void) { mp_int a, b, c; int size; DOR(mp_init_multi(&a, &b, &c, NULL)); - for (size = MP_TOOM_SQR_CUTOFF; size < MP_TOOM_SQR_CUTOFF + 20; size++) { + for (size = MP_SQR_TOOM_CUTOFF; size < MP_SQR_TOOM_CUTOFF + 20; size++) { DO(mp_rand(&a, size)); - DO(s_mp_toom_sqr(&a, &b)); + DO(s_mp_sqr_toom(&a, &b)); DO(s_mp_sqr(&a, &c)); if (mp_cmp(&b, &c) != MP_EQ) { fprintf(stderr, "Toom-Cook 3-way squaring failed at size %d\n", size); @@ -2075,7 +2075,7 @@ static int test_s_mp_div_recursive(void) DOR(mp_init_multi(&a, &b, &c_q, &c_r, &d_q, &d_r, NULL)); - for (size = MP_KARATSUBA_MUL_CUTOFF; size < 3 * MP_KARATSUBA_MUL_CUTOFF; size += 10) { + for (size = MP_MUL_KARATSUBA_CUTOFF; size < 3 * MP_MUL_KARATSUBA_CUTOFF; size += 10) { printf("\rsizes = %d / %d", 10 * size, size); /* Relation 10:1 */ DO(mp_rand(&a, 10 * size)); @@ -2139,7 +2139,7 @@ static int test_s_mp_div_small(void) int size; DOR(mp_init_multi(&a, &b, &c_q, &c_r, &d_q, &d_r, NULL)); - for (size = 1; size < MP_KARATSUBA_MUL_CUTOFF; size += 10) { + for (size = 1; size < MP_MUL_KARATSUBA_CUTOFF; size += 10) { printf("\rsizes = %d / %d", 2 * size, size); /* Relation 10:1 */ DO(mp_rand(&a, 2 * size)); @@ -2332,11 +2332,11 @@ static int unit_tests(int argc, char **argv) T1(mp_xor, MP_XOR), T2(s_mp_div_recursive, S_MP_DIV_RECURSIVE, S_MP_DIV_SCHOOL), T2(s_mp_div_small, S_MP_DIV_SMALL, S_MP_DIV_SCHOOL), - T1(s_mp_balance_mul, S_MP_BALANCE_MUL), - T1(s_mp_karatsuba_mul, S_MP_KARATSUBA_MUL), - T1(s_mp_karatsuba_sqr, S_MP_KARATSUBA_SQR), - T1(s_mp_toom_mul, S_MP_TOOM_MUL), - T1(s_mp_toom_sqr, S_MP_TOOM_SQR) + T1(s_mp_mul_balance, S_MP_MUL_BALANCE), + T1(s_mp_mul_karatsuba, S_MP_MUL_KARATSUBA), + T1(s_mp_sqr_karatsuba, S_MP_SQR_KARATSUBA), + T1(s_mp_mul_toom, S_MP_MUL_TOOM), + T1(s_mp_sqr_toom, S_MP_SQR_TOOM) #undef T2 #undef T1 }; diff --git a/demo/timing.c b/demo/timing.c index bb2d6c4..4385a08 100644 --- a/demo/timing.c +++ b/demo/timing.c @@ -247,18 +247,18 @@ int main(int argc, char **argv) if (should_test("mulsqr", argc, argv) != 0) { /* do mult/square twice, first without karatsuba and second with */ - old_kara_m = MP_KARATSUBA_MUL_CUTOFF; - old_kara_s = MP_KARATSUBA_SQR_CUTOFF; + old_kara_m = MP_MUL_KARATSUBA_CUTOFF; + old_kara_s = MP_SQR_KARATSUBA_CUTOFF; /* currently toom-cook cut-off is too high to kick in, so we just use the karatsuba values */ old_toom_m = old_kara_m; old_toom_s = old_kara_s; for (ix = 0; ix < 3; ix++) { printf("With%s Karatsuba, With%s Toom\n", (ix == 1) ? "" : "out", (ix == 2) ? "" : "out"); - MP_KARATSUBA_MUL_CUTOFF = (ix == 1) ? old_kara_m : 9999; - MP_KARATSUBA_SQR_CUTOFF = (ix == 1) ? old_kara_s : 9999; - MP_TOOM_MUL_CUTOFF = (ix == 2) ? old_toom_m : 9999; - MP_TOOM_SQR_CUTOFF = (ix == 2) ? old_toom_s : 9999; + MP_MUL_KARATSUBA_CUTOFF = (ix == 1) ? old_kara_m : 9999; + MP_SQR_KARATSUBA_CUTOFF = (ix == 1) ? old_kara_s : 9999; + MP_MUL_TOOM_CUTOFF = (ix == 2) ? old_toom_m : 9999; + MP_SQR_TOOM_CUTOFF = (ix == 2) ? old_toom_s : 9999; log = FOPEN((ix == 0) ? "logs/mult" MP_TIMING_VERSION ".log" : (ix == 1) ? "logs/mult_kara" MP_TIMING_VERSION ".log" : "logs/mult_toom" MP_TIMING_VERSION ".log", "w"); diff --git a/etc/tune.c b/etc/tune.c index be78ce3..9657910 100644 --- a/etc/tune.c +++ b/etc/tune.c @@ -58,7 +58,7 @@ static int s_number_of_test_loops; static int s_stabilization_extra; static int s_offset = 1; -#define s_mp_mul(a, b, c) s_mp_mul_digs(a, b, c, (a)->used + (b)->used + 1) +#define s_mp_mul_full(a, b, c) s_mp_mul(a, b, c, (a)->used + (b)->used + 1) static uint64_t s_time_mul(int size) { int x; @@ -87,7 +87,7 @@ static uint64_t s_time_mul(int size) goto LBL_ERR; } if (s_check_result == 1) { - if ((e = s_mp_mul(&a,&b,&d)) != MP_OKAY) { + if ((e = s_mp_mul_full(&a,&b,&d)) != MP_OKAY) { t1 = UINT64_MAX; goto LBL_ERR; } @@ -247,8 +247,8 @@ static void s_usage(char *s) } struct cutoffs { - int KARATSUBA_MUL, KARATSUBA_SQR; - int TOOM_MUL, TOOM_SQR; + int MUL_KARATSUBA, SQR_KARATSUBA; + int MUL_TOOM, SQR_TOOM; }; const struct cutoffs max_cutoffs = @@ -256,18 +256,18 @@ const struct cutoffs max_cutoffs = static void set_cutoffs(const struct cutoffs *c) { - MP_KARATSUBA_MUL_CUTOFF = c->KARATSUBA_MUL; - MP_KARATSUBA_SQR_CUTOFF = c->KARATSUBA_SQR; - MP_TOOM_MUL_CUTOFF = c->TOOM_MUL; - MP_TOOM_SQR_CUTOFF = c->TOOM_SQR; + MP_MUL_KARATSUBA_CUTOFF = c->MUL_KARATSUBA; + MP_SQR_KARATSUBA_CUTOFF = c->SQR_KARATSUBA; + MP_MUL_TOOM_CUTOFF = c->MUL_TOOM; + MP_SQR_TOOM_CUTOFF = c->SQR_TOOM; } static void get_cutoffs(struct cutoffs *c) { - c->KARATSUBA_MUL = MP_KARATSUBA_MUL_CUTOFF; - c->KARATSUBA_SQR = MP_KARATSUBA_SQR_CUTOFF; - c->TOOM_MUL = MP_TOOM_MUL_CUTOFF; - c->TOOM_SQR = MP_TOOM_SQR_CUTOFF; + c->MUL_KARATSUBA = MP_MUL_KARATSUBA_CUTOFF; + c->SQR_KARATSUBA = MP_SQR_KARATSUBA_CUTOFF; + c->MUL_TOOM = MP_MUL_TOOM_CUTOFF; + c->SQR_TOOM = MP_SQR_TOOM_CUTOFF; } @@ -292,7 +292,7 @@ int main(int argc, char **argv) s_number_of_test_loops = 64; s_stabilization_extra = 3; - MP_ZERO_BUFFER(&args, sizeof(args)); + s_mp_zero_buf(&args, sizeof(args)); args.testmode = 0; args.verbose = 0; @@ -414,13 +414,13 @@ int main(int argc, char **argv) s_usage(argv[0]); } str = argv[opt]; - MP_KARATSUBA_MUL_CUTOFF = (int)s_strtol(str, &endptr, "[1/4] No value for MP_KARATSUBA_MUL_CUTOFF given"); + MP_MUL_KARATSUBA_CUTOFF = (int)s_strtol(str, &endptr, "[1/4] No value for MP_MUL_KARATSUBA_CUTOFF given"); str = endptr + 1; - MP_KARATSUBA_SQR_CUTOFF = (int)s_strtol(str, &endptr, "[2/4] No value for MP_KARATSUBA_SQR_CUTOFF given"); + MP_SQR_KARATSUBA_CUTOFF = (int)s_strtol(str, &endptr, "[2/4] No value for MP_SQR_KARATSUBA_CUTOFF given"); str = endptr + 1; - MP_TOOM_MUL_CUTOFF = (int)s_strtol(str, &endptr, "[3/4] No value for MP_TOOM_MUL_CUTOFF given"); + MP_MUL_TOOM_CUTOFF = (int)s_strtol(str, &endptr, "[3/4] No value for MP_MUL_TOOM_CUTOFF given"); str = endptr + 1; - MP_TOOM_SQR_CUTOFF = (int)s_strtol(str, &endptr, "[4/4] No value for MP_TOOM_SQR_CUTOFF given"); + MP_SQR_TOOM_CUTOFF = (int)s_strtol(str, &endptr, "[4/4] No value for MP_SQR_TOOM_CUTOFF given"); break; case 'h': s_exit_code = EXIT_SUCCESS; @@ -455,10 +455,10 @@ int main(int argc, char **argv) of the macro MP_WPARRAY in tommath.h which needs to be changed manually (to 0 (zero)). */ - T_MUL_SQR("Karatsuba multiplication", KARATSUBA_MUL, s_time_mul), - T_MUL_SQR("Karatsuba squaring", KARATSUBA_SQR, s_time_sqr), - T_MUL_SQR("Toom-Cook 3-way multiplying", TOOM_MUL, s_time_mul), - T_MUL_SQR("Toom-Cook 3-way squaring", TOOM_SQR, s_time_sqr), + T_MUL_SQR("Karatsuba multiplication", MUL_KARATSUBA, s_time_mul), + T_MUL_SQR("Karatsuba squaring", SQR_KARATSUBA, s_time_sqr), + T_MUL_SQR("Toom-Cook 3-way multiplying", MUL_TOOM, s_time_mul), + T_MUL_SQR("Toom-Cook 3-way squaring", SQR_TOOM, s_time_sqr), #undef T_MUL_SQR }; /* Turn all limits from bncore.c to the max */ @@ -473,15 +473,15 @@ int main(int argc, char **argv) } if (args.terse == 1) { printf("%d %d %d %d\n", - updated.KARATSUBA_MUL, - updated.KARATSUBA_SQR, - updated.TOOM_MUL, - updated.TOOM_SQR); + updated.MUL_KARATSUBA, + updated.SQR_KARATSUBA, + updated.MUL_TOOM, + updated.SQR_TOOM); } else { - printf("KARATSUBA_MUL_CUTOFF = %d\n", updated.KARATSUBA_MUL); - printf("KARATSUBA_SQR_CUTOFF = %d\n", updated.KARATSUBA_SQR); - printf("TOOM_MUL_CUTOFF = %d\n", updated.TOOM_MUL); - printf("TOOM_SQR_CUTOFF = %d\n", updated.TOOM_SQR); + printf("MUL_KARATSUBA_CUTOFF = %d\n", updated.MUL_KARATSUBA); + printf("SQR_KARATSUBA_CUTOFF = %d\n", updated.SQR_KARATSUBA); + printf("MUL_TOOM_CUTOFF = %d\n", updated.MUL_TOOM); + printf("SQR_TOOM_CUTOFF = %d\n", updated.SQR_TOOM); } if (args.print == 1) { @@ -526,15 +526,15 @@ int main(int argc, char **argv) set_cutoffs(&orig); if (args.terse == 1) { printf("%d %d %d %d\n", - MP_KARATSUBA_MUL_CUTOFF, - MP_KARATSUBA_SQR_CUTOFF, - MP_TOOM_MUL_CUTOFF, - MP_TOOM_SQR_CUTOFF); + MP_MUL_KARATSUBA_CUTOFF, + MP_SQR_KARATSUBA_CUTOFF, + MP_MUL_TOOM_CUTOFF, + MP_SQR_TOOM_CUTOFF); } else { - printf("KARATSUBA_MUL_CUTOFF = %d\n", MP_KARATSUBA_MUL_CUTOFF); - printf("KARATSUBA_SQR_CUTOFF = %d\n", MP_KARATSUBA_SQR_CUTOFF); - printf("TOOM_MUL_CUTOFF = %d\n", MP_TOOM_MUL_CUTOFF); - printf("TOOM_SQR_CUTOFF = %d\n", MP_TOOM_SQR_CUTOFF); + printf("MUL_KARATSUBA_CUTOFF = %d\n", MP_MUL_KARATSUBA_CUTOFF); + printf("SQR_KARATSUBA_CUTOFF = %d\n", MP_SQR_KARATSUBA_CUTOFF); + printf("MUL_TOOM_CUTOFF = %d\n", MP_MUL_TOOM_CUTOFF); + printf("SQR_TOOM_CUTOFF = %d\n", MP_SQR_TOOM_CUTOFF); } } } diff --git a/etc/tune_it.sh b/etc/tune_it.sh index 5e0fe7c..dba5b69 100755 --- a/etc/tune_it.sh +++ b/etc/tune_it.sh @@ -93,15 +93,14 @@ i=$(tail -n +2 $FILE_NAME | wc -l) # our median point will be at $i entries i=$(( (i / 2) + 1 )) TMP=$(median $FILE_NAME 1 $i) -echo "#define MP_DEFAULT_KARATSUBA_MUL_CUTOFF $TMP" -echo "#define MP_DEFAULT_KARATSUBA_MUL_CUTOFF $TMP" >> $TOMMATH_CUTOFFS_H || die "(km) Appending to $TOMMATH_CUTOFFS_H" $? +echo "#define MP_DEFAULT_MUL_KARATSUBA_CUTOFF $TMP" +echo "#define MP_DEFAULT_MUL_KARATSUBA_CUTOFF $TMP" >> $TOMMATH_CUTOFFS_H || die "(km) Appending to $TOMMATH_CUTOFFS_H" $? TMP=$(median $FILE_NAME 2 $i) -echo "#define MP_DEFAULT_KARATSUBA_SQR_CUTOFF $TMP" -echo "#define MP_DEFAULT_KARATSUBA_SQR_CUTOFF $TMP" >> $TOMMATH_CUTOFFS_H || die "(ks) Appending to $TOMMATH_CUTOFFS_H" $? +echo "#define MP_DEFAULT_SQR_KARATSUBA_CUTOFF $TMP" +echo "#define MP_DEFAULT_SQR_KARATSUBA_CUTOFF $TMP" >> $TOMMATH_CUTOFFS_H || die "(ks) Appending to $TOMMATH_CUTOFFS_H" $? TMP=$(median $FILE_NAME 3 $i) -echo "#define MP_DEFAULT_TOOM_MUL_CUTOFF $TMP" -echo "#define MP_DEFAULT_TOOM_MUL_CUTOFF $TMP" >> $TOMMATH_CUTOFFS_H || die "(tc3m) Appending to $TOMMATH_CUTOFFS_H" $? +echo "#define MP_DEFAULT_MUL_TOOM_CUTOFF $TMP" +echo "#define MP_DEFAULT_MUL_TOOM_CUTOFF $TMP" >> $TOMMATH_CUTOFFS_H || die "(tc3m) Appending to $TOMMATH_CUTOFFS_H" $? TMP=$(median $FILE_NAME 4 $i) -echo "#define MP_DEFAULT_TOOM_SQR_CUTOFF $TMP" -echo "#define MP_DEFAULT_TOOM_SQR_CUTOFF $TMP" >> $TOMMATH_CUTOFFS_H || die "(tc3s) Appending to $TOMMATH_CUTOFFS_H" $? - +echo "#define MP_DEFAULT_SQR_TOOM_CUTOFF $TMP" +echo "#define MP_DEFAULT_SQR_TOOM_CUTOFF $TMP" >> $TOMMATH_CUTOFFS_H || die "(tc3s) Appending to $TOMMATH_CUTOFFS_H" $? diff --git a/helper.pl b/helper.pl index 223c7ef..93ca40a 100755 --- a/helper.pl +++ b/helper.pl @@ -57,9 +57,8 @@ sub check_source { push @{$troubles->{unwanted_calloc}}, $lineno if $file =~ /^[^\/]+\.c$/ && $l =~ /\bcalloc\s*\(/; push @{$troubles->{unwanted_free}}, $lineno if $file =~ /^[^\/]+\.c$/ && $l =~ /\bfree\s*\(/; # and we probably want to also avoid the following - push @{$troubles->{unwanted_memcpy}}, $lineno if $file =~ /^[^\/]+\.c$/ && $l =~ /\bmemcpy\s*\(/; - push @{$troubles->{unwanted_memset}}, $lineno if $file =~ /^[^\/]+\.c$/ && $l =~ /\bmemset\s*\(/; - push @{$troubles->{unwanted_memcpy}}, $lineno if $file =~ /^[^\/]+\.c$/ && $l =~ /\bmemcpy\s*\(/; + push @{$troubles->{unwanted_memcpy}}, $lineno if $file =~ /^[^\/]+\.c$/ && $l =~ /\bmemcpy\s*\(/ && $file !~ /s_mp_copy_digs.c/; + push @{$troubles->{unwanted_memset}}, $lineno if $file =~ /^[^\/]+\.c$/ && $l =~ /\bmemset\s*\(/ && $file !~ /s_mp_zero_buf.c/ && $file !~ /s_mp_zero_digs.c/; push @{$troubles->{unwanted_memmove}}, $lineno if $file =~ /^[^\/]+\.c$/ && $l =~ /\bmemmove\s*\(/; push @{$troubles->{unwanted_memcmp}}, $lineno if $file =~ /^[^\/]+\.c$/ && $l =~ /\bmemcmp\s*\(/; push @{$troubles->{unwanted_strcmp}}, $lineno if $file =~ /^[^\/]+\.c$/ && $l =~ /\bstrcmp\s*\(/; diff --git a/libtommath_VS2008.vcproj b/libtommath_VS2008.vcproj index 116275e..07c784b 100644 --- a/libtommath_VS2008.vcproj +++ b/libtommath_VS2008.vcproj @@ -576,10 +576,6 @@ RelativePath="mp_mod_2d.c" > - - @@ -833,7 +829,7 @@ > - - - - + + + + + + + + + + alloc < (a->used + 1)) { - if ((err = mp_grow(c, a->used + 1)) != MP_OKAY) { - return err; - } + if ((err = mp_grow(c, a->used + 1)) != MP_OKAY) { + return err; } /* if a is negative and |a| >= b, call c = |a| - b */ @@ -53,49 +50,34 @@ mp_err mp_add_d(const mp_int *a, mp_digit b, mp_int *c) /* old number of used digits in c */ oldused = c->used; - /* source alias */ - tmpa = a->dp; - - /* destination alias */ - tmpc = c->dp; - /* if a is positive */ if (a->sign == MP_ZPOS) { /* add digits, mu is carry */ + int i; mp_digit mu = b; - for (ix = 0; ix < a->used; ix++) { - *tmpc = *tmpa++ + mu; - mu = *tmpc >> MP_DIGIT_BIT; - *tmpc++ &= MP_MASK; + for (i = 0; i < a->used; i++) { + c->dp[i] = a->dp[i] + mu; + mu = c->dp[i] >> MP_DIGIT_BIT; + c->dp[i] &= MP_MASK; } /* set final carry */ - ix++; - *tmpc++ = mu; + c->dp[i] = mu; /* setup size */ c->used = a->used + 1; } else { /* a was negative and |a| < b */ - c->used = 1; + c->used = 1; /* the result is a single digit */ - if (a->used == 1) { - *tmpc++ = b - a->dp[0]; - } else { - *tmpc++ = b; - } - - /* setup count so the clearing of oldused - * can fall through correctly - */ - ix = 1; + c->dp[0] = (a->used == 1) ? b - a->dp[0] : b; } /* sign always positive */ c->sign = MP_ZPOS; /* now zero to oldused */ - MP_ZERO_DIGITS(tmpc, oldused - ix); + s_mp_zero_digs(c->dp + c->used, oldused - c->used); mp_clamp(c); return MP_OKAY; diff --git a/mp_and.c b/mp_and.c index 92e6aed..a865ae0 100644 --- a/mp_and.c +++ b/mp_and.c @@ -11,10 +11,8 @@ mp_err mp_and(const mp_int *a, const mp_int *b, mp_int *c) mp_digit ac = 1, bc = 1, cc = 1; mp_sign csign = ((a->sign == MP_NEG) && (b->sign == MP_NEG)) ? MP_NEG : MP_ZPOS; - if (c->alloc < used) { - if ((err = mp_grow(c, used)) != MP_OKAY) { - return err; - } + if ((err = mp_grow(c, used)) != MP_OKAY) { + return err; } for (i = 0; i < used; i++) { diff --git a/mp_clear.c b/mp_clear.c index 55d76b2..11094b2 100644 --- a/mp_clear.c +++ b/mp_clear.c @@ -9,7 +9,7 @@ void mp_clear(mp_int *a) /* only do anything if a hasn't been freed previously */ if (a->dp != NULL) { /* free ram */ - MP_FREE_DIGITS(a->dp, a->alloc); + MP_FREE_DIGS(a->dp, a->alloc); /* reset members to make debugging easier */ a->dp = NULL; diff --git a/mp_complement.c b/mp_complement.c index ad6bed3..c16e25f 100644 --- a/mp_complement.c +++ b/mp_complement.c @@ -6,7 +6,8 @@ /* b = ~a */ mp_err mp_complement(const mp_int *a, mp_int *b) { - mp_err err = mp_neg(a, b); - return (err == MP_OKAY) ? mp_sub_d(b, 1uL, b) : err; + mp_int a_ = *a; + a_.sign = ((a_.sign == MP_ZPOS) && !mp_iszero(a)) ? MP_NEG : MP_ZPOS; + return mp_sub_d(&a_, 1uL, b); } #endif diff --git a/mp_copy.c b/mp_copy.c index cf4d5e0..d79e2b8 100644 --- a/mp_copy.c +++ b/mp_copy.c @@ -6,7 +6,7 @@ /* copy, b = a */ mp_err mp_copy(const mp_int *a, mp_int *b) { - int n; + mp_err err; /* if dst == src do nothing */ if (a == b) { @@ -14,26 +14,16 @@ mp_err mp_copy(const mp_int *a, mp_int *b) } /* grow dest */ - if (b->alloc < a->used) { - mp_err err; - if ((err = mp_grow(b, a->used)) != MP_OKAY) { - return err; - } + if ((err = mp_grow(b, a->used)) != MP_OKAY) { + return err; } - /* zero b and copy the parameters over */ - - /* copy all the digits */ - for (n = 0; n < a->used; n++) { - b->dp[n] = a->dp[n]; - } - - /* clear high digits */ - MP_ZERO_DIGITS(b->dp + a->used, b->used - a->used); - - /* copy used count and sign */ + /* copy everything over and zero high digits */ + s_mp_copy_digs(b->dp, a->dp, a->used); + s_mp_zero_digs(b->dp + a->used, b->used - a->used); b->used = a->used; b->sign = a->sign; + return MP_OKAY; } #endif diff --git a/mp_cutoffs.c b/mp_cutoffs.c index 46b04ef..45b0bee 100644 --- a/mp_cutoffs.c +++ b/mp_cutoffs.c @@ -5,10 +5,10 @@ #ifndef MP_FIXED_CUTOFFS #include "tommath_cutoffs.h" -int MP_KARATSUBA_MUL_CUTOFF = MP_DEFAULT_KARATSUBA_MUL_CUTOFF, - MP_KARATSUBA_SQR_CUTOFF = MP_DEFAULT_KARATSUBA_SQR_CUTOFF, - MP_TOOM_MUL_CUTOFF = MP_DEFAULT_TOOM_MUL_CUTOFF, - MP_TOOM_SQR_CUTOFF = MP_DEFAULT_TOOM_SQR_CUTOFF; +int MP_MUL_KARATSUBA_CUTOFF = MP_DEFAULT_MUL_KARATSUBA_CUTOFF, + MP_SQR_KARATSUBA_CUTOFF = MP_DEFAULT_SQR_KARATSUBA_CUTOFF, + MP_MUL_TOOM_CUTOFF = MP_DEFAULT_MUL_TOOM_CUTOFF, + MP_SQR_TOOM_CUTOFF = MP_DEFAULT_SQR_TOOM_CUTOFF; #endif #endif diff --git a/mp_div.c b/mp_div.c index 05b96dd..cbc52e8 100644 --- a/mp_div.c +++ b/mp_div.c @@ -26,7 +26,7 @@ mp_err mp_div(const mp_int *a, const mp_int *b, mp_int *c, mp_int *d) } if (MP_HAS(S_MP_DIV_RECURSIVE) - && (b->used > MP_KARATSUBA_MUL_CUTOFF) + && (b->used > MP_MUL_KARATSUBA_CUTOFF) && (b->used <= ((a->used/3)*2))) { err = s_mp_div_recursive(a, b, c, d); } else if (MP_HAS(S_MP_DIV_SCHOOL)) { diff --git a/mp_div_2.c b/mp_div_2.c index 60bd63d..8ab9bcb 100644 --- a/mp_div_2.c +++ b/mp_div_2.c @@ -6,41 +6,32 @@ /* b = a/2 */ mp_err mp_div_2(const mp_int *a, mp_int *b) { - int x, oldused; - mp_digit r, rr, *tmpa, *tmpb; mp_err err; + int x, oldused; + mp_digit r; - /* copy */ - if (b->alloc < a->used) { - if ((err = mp_grow(b, a->used)) != MP_OKAY) { - return err; - } + if ((err = mp_grow(b, a->used)) != MP_OKAY) { + return err; } oldused = b->used; b->used = a->used; - /* source alias */ - tmpa = a->dp + b->used - 1; - - /* dest alias */ - tmpb = b->dp + b->used - 1; - /* carry */ r = 0; - for (x = b->used - 1; x >= 0; x--) { + for (x = b->used; x --> 0;) { /* get the carry for the next iteration */ - rr = *tmpa & 1u; + mp_digit rr = a->dp[x] & 1u; /* shift the current digit, add in carry and store */ - *tmpb-- = (*tmpa-- >> 1) | (r << (MP_DIGIT_BIT - 1)); + b->dp[x] = (a->dp[x] >> 1) | (r << (MP_DIGIT_BIT - 1)); /* forward carry to next iteration */ r = rr; } /* zero excess digits */ - MP_ZERO_DIGITS(b->dp + b->used, oldused - b->used); + s_mp_zero_digs(b->dp + b->used, oldused - b->used); b->sign = a->sign; mp_clamp(b); diff --git a/mp_div_2d.c b/mp_div_2d.c index 9b396ac..e523465 100644 --- a/mp_div_2d.c +++ b/mp_div_2d.c @@ -6,23 +6,16 @@ /* shift right by a certain bit count (store quotient in c, optional remainder in d) */ mp_err mp_div_2d(const mp_int *a, int b, mp_int *c, mp_int *d) { - mp_digit D, r, rr; - int x; mp_err err; - /* if the shift count is <= 0 then we do no work */ - if (b <= 0) { - err = mp_copy(a, c); - if (d != NULL) { - mp_zero(d); - } - return err; + if (b < 0) { + return MP_VAL; } - /* copy */ if ((err = mp_copy(a, c)) != MP_OKAY) { return err; } + /* 'a' should not be used after here - it might be the same as d */ /* get the remainder */ @@ -38,28 +31,25 @@ mp_err mp_div_2d(const mp_int *a, int b, mp_int *c, mp_int *d) } /* shift any bit count < MP_DIGIT_BIT */ - D = (mp_digit)(b % MP_DIGIT_BIT); - if (D != 0u) { - mp_digit *tmpc, mask, shift; + b %= MP_DIGIT_BIT; + if (b != 0u) { + int x; + mp_digit r, mask, shift; /* mask */ - mask = ((mp_digit)1 << D) - 1uL; + mask = ((mp_digit)1 << b) - 1uL; /* shift for lsb */ - shift = (mp_digit)MP_DIGIT_BIT - D; - - /* alias */ - tmpc = c->dp + (c->used - 1); + shift = (mp_digit)(MP_DIGIT_BIT - b); /* carry */ r = 0; - for (x = c->used - 1; x >= 0; x--) { + for (x = c->used; x --> 0;) { /* get the lower bits of this word in a temp */ - rr = *tmpc & mask; + mp_digit rr = c->dp[x] & mask; /* shift the current word and mix in the carry bits from the previous word */ - *tmpc = (*tmpc >> D) | (r << shift); - --tmpc; + c->dp[x] = (c->dp[x] >> b) | (r << shift); /* set the carry to the carry bits of the current word found above */ r = rr; diff --git a/mp_div_d.c b/mp_div_d.c index 98b6b24..472ab27 100644 --- a/mp_div_d.c +++ b/mp_div_d.c @@ -8,7 +8,6 @@ mp_err mp_div_d(const mp_int *a, mp_digit b, mp_int *c, mp_digit *d) { mp_int q; mp_word w; - mp_digit t; mp_err err; int ix; @@ -56,14 +55,12 @@ mp_err mp_div_d(const mp_int *a, mp_digit b, mp_int *c, mp_digit *d) q.used = a->used; q.sign = a->sign; w = 0; - for (ix = a->used - 1; ix >= 0; ix--) { + for (ix = a->used; ix --> 0;) { + mp_digit t = 0; w = (w << (mp_word)MP_DIGIT_BIT) | (mp_word)a->dp[ix]; - if (w >= b) { t = (mp_digit)(w / b); w -= (mp_word)t * (mp_word)b; - } else { - t = 0; } q.dp[ix] = t; } @@ -78,7 +75,7 @@ mp_err mp_div_d(const mp_int *a, mp_digit b, mp_int *c, mp_digit *d) } mp_clear(&q); - return err; + return MP_OKAY; } #endif diff --git a/mp_dr_reduce.c b/mp_dr_reduce.c index fba0e21..f0f6f35 100644 --- a/mp_dr_reduce.c +++ b/mp_dr_reduce.c @@ -19,59 +19,49 @@ */ mp_err mp_dr_reduce(mp_int *x, const mp_int *n, mp_digit k) { - mp_err err; - int i, m; - mp_word r; - mp_digit mu, *tmpx1, *tmpx2; + mp_err err; /* m = digits in modulus */ - m = n->used; + int m = n->used; /* ensure that "x" has at least 2m digits */ - if (x->alloc < (m + m)) { - if ((err = mp_grow(x, m + m)) != MP_OKAY) { - return err; - } + if ((err = mp_grow(x, m + m)) != MP_OKAY) { + return err; } /* top of loop, this is where the code resumes if * another reduction pass is required. */ -top: - /* aliases for digits */ - /* alias for lower half of x */ - tmpx1 = x->dp; + for (;;) { + int i; + mp_digit mu = 0; - /* alias for upper half of x, or x/B**m */ - tmpx2 = x->dp + m; + /* compute (x mod B**m) + k * [x/B**m] inline and inplace */ + for (i = 0; i < m; i++) { + mp_word r = ((mp_word)x->dp[i + m] * (mp_word)k) + x->dp[i] + mu; + x->dp[i] = (mp_digit)(r & MP_MASK); + mu = (mp_digit)(r >> ((mp_word)MP_DIGIT_BIT)); + } - /* set carry to zero */ - mu = 0; + /* set final carry */ + x->dp[i] = mu; - /* compute (x mod B**m) + k * [x/B**m] inline and inplace */ - for (i = 0; i < m; i++) { - r = ((mp_word)*tmpx2++ * (mp_word)k) + *tmpx1 + mu; - *tmpx1++ = (mp_digit)(r & MP_MASK); - mu = (mp_digit)(r >> ((mp_word)MP_DIGIT_BIT)); - } + /* zero words above m */ + s_mp_zero_digs(x->dp + m + 1, (x->used - m) - 1); - /* set final carry */ - *tmpx1++ = mu; + /* clamp, sub and return */ + mp_clamp(x); - /* zero words above m */ - MP_ZERO_DIGITS(tmpx1, (x->used - m) - 1); + /* if x >= n then subtract and reduce again + * Each successive "recursion" makes the input smaller and smaller. + */ + if (mp_cmp_mag(x, n) == MP_LT) { + break; + } - /* clamp, sub and return */ - mp_clamp(x); - - /* if x >= n then subtract and reduce again - * Each successive "recursion" makes the input smaller and smaller. - */ - if (mp_cmp_mag(x, n) != MP_LT) { if ((err = s_mp_sub(x, n, x)) != MP_OKAY) { return err; } - goto top; } return MP_OKAY; } diff --git a/mp_from_ubin.c b/mp_from_ubin.c index ae79be3..8272185 100644 --- a/mp_from_ubin.c +++ b/mp_from_ubin.c @@ -9,10 +9,8 @@ mp_err mp_from_ubin(mp_int *a, const uint8_t *buf, size_t size) mp_err err; /* make sure there are at least two digits */ - if (a->alloc < 2) { - if ((err = mp_grow(a, 2)) != MP_OKAY) { - return err; - } + if ((err = mp_grow(a, 2)) != MP_OKAY) { + return err; } /* zero the int */ diff --git a/mp_fwrite.c b/mp_fwrite.c index 42d7287..6b8ea13 100644 --- a/mp_fwrite.c +++ b/mp_fwrite.c @@ -25,7 +25,7 @@ mp_err mp_fwrite(const mp_int *a, int radix, FILE *stream) } } - MP_FREE_BUFFER(buf, size); + MP_FREE_BUF(buf, size); return err; } #endif diff --git a/mp_grow.c b/mp_grow.c index 25be5ed..0de6679 100644 --- a/mp_grow.c +++ b/mp_grow.c @@ -26,7 +26,7 @@ mp_err mp_grow(mp_int *a, int size) a->dp = dp; /* zero excess digits */ - MP_ZERO_DIGITS(a->dp + a->alloc, size - a->alloc); + s_mp_zero_digs(a->dp + a->alloc, size - a->alloc); a->alloc = size; } return MP_OKAY; diff --git a/mp_invmod.c b/mp_invmod.c index e43eb3e..94929cc 100644 --- a/mp_invmod.c +++ b/mp_invmod.c @@ -12,12 +12,12 @@ mp_err mp_invmod(const mp_int *a, const mp_int *b, mp_int *c) } /* if the modulus is odd we can use a faster routine instead */ - if (MP_HAS(S_MP_INVMOD_FAST) && mp_isodd(b)) { - return s_mp_invmod_fast(a, b, c); + if (MP_HAS(S_MP_INVMOD_ODD) && mp_isodd(b)) { + return s_mp_invmod_odd(a, b, c); } - return MP_HAS(S_MP_INVMOD_SLOW) - ? s_mp_invmod_slow(a, b, c) + return MP_HAS(S_MP_INVMOD) + ? s_mp_invmod(a, b, c) : MP_VAL; } #endif diff --git a/mp_lshd.c b/mp_lshd.c index 6c14402..bfa8af8 100644 --- a/mp_lshd.c +++ b/mp_lshd.c @@ -6,6 +6,7 @@ /* shift left a certain amount of digits */ mp_err mp_lshd(mp_int *a, int b) { + mp_err err; int x; /* if its less than zero return */ @@ -18,11 +19,8 @@ mp_err mp_lshd(mp_int *a, int b) } /* grow to fit the new digits */ - if (a->alloc < (a->used + b)) { - mp_err err; - if ((err = mp_grow(a, a->used + b)) != MP_OKAY) { - return err; - } + if ((err = mp_grow(a, a->used + b)) != MP_OKAY) { + return err; } /* increment the used by the shift amount then copy upwards */ @@ -37,7 +35,7 @@ mp_err mp_lshd(mp_int *a, int b) } /* zero the lower digits */ - MP_ZERO_DIGITS(a->dp, b); + s_mp_zero_digs(a->dp, b); return MP_OKAY; } diff --git a/mp_mod_2d.c b/mp_mod_2d.c index a94a314..82c64f0 100644 --- a/mp_mod_2d.c +++ b/mp_mod_2d.c @@ -29,7 +29,7 @@ mp_err mp_mod_2d(const mp_int *a, int b, mp_int *c) /* zero digits above the last digit of the modulus */ x = (b / MP_DIGIT_BIT) + (((b % MP_DIGIT_BIT) == 0) ? 0 : 1); - MP_ZERO_DIGITS(c->dp + x, c->used - x); + s_mp_zero_digs(c->dp + x, c->used - x); /* clear the digit that is not completely outside/inside the modulus */ c->dp[b / MP_DIGIT_BIT] &= diff --git a/mp_mod_d.c b/mp_mod_d.c deleted file mode 100644 index 3f7e191..0000000 --- a/mp_mod_d.c +++ /dev/null @@ -1,10 +0,0 @@ -#include "tommath_private.h" -#ifdef MP_MOD_D_C -/* LibTomMath, multiple-precision integer library -- Tom St Denis */ -/* SPDX-License-Identifier: Unlicense */ - -mp_err mp_mod_d(const mp_int *a, mp_digit b, mp_digit *c) -{ - return mp_div_d(a, b, NULL, c); -} -#endif diff --git a/mp_montgomery_reduce.c b/mp_montgomery_reduce.c index a872aba..de6a900 100644 --- a/mp_montgomery_reduce.c +++ b/mp_montgomery_reduce.c @@ -6,9 +6,8 @@ /* computes xR**-1 == x (mod N) via Montgomery Reduction */ mp_err mp_montgomery_reduce(mp_int *x, const mp_int *n, mp_digit rho) { - int ix, digs; - mp_err err; - mp_digit mu; + mp_err err; + int ix, digs; /* can the fast reduction [comba] method be used? * @@ -20,18 +19,19 @@ mp_err mp_montgomery_reduce(mp_int *x, const mp_int *n, mp_digit rho) if ((digs < MP_WARRAY) && (x->used <= MP_WARRAY) && (n->used < MP_MAXFAST)) { - return s_mp_montgomery_reduce_fast(x, n, rho); + return s_mp_montgomery_reduce_comba(x, n, rho); } /* grow the input as required */ - if (x->alloc < digs) { - if ((err = mp_grow(x, digs)) != MP_OKAY) { - return err; - } + if ((err = mp_grow(x, digs)) != MP_OKAY) { + return err; } x->used = digs; for (ix = 0; ix < n->used; ix++) { + int iy; + mp_digit u, mu; + /* mu = ai * rho mod b * * The value of rho must be precalculated via @@ -43,41 +43,28 @@ mp_err mp_montgomery_reduce(mp_int *x, const mp_int *n, mp_digit rho) mu = (mp_digit)(((mp_word)x->dp[ix] * (mp_word)rho) & MP_MASK); /* a = a + mu * m * b**i */ - { - int iy; - mp_digit *tmpn, *tmpx, u; - mp_word r; - /* alias for digits of the modulus */ - tmpn = n->dp; + /* Multiply and add in place */ + u = 0; + for (iy = 0; iy < n->used; iy++) { + /* compute product and sum */ + mp_word r = ((mp_word)mu * (mp_word)n->dp[iy]) + + (mp_word)u + (mp_word)x->dp[ix + iy]; - /* alias for the digits of x [the input] */ - tmpx = x->dp + ix; + /* get carry */ + u = (mp_digit)(r >> (mp_word)MP_DIGIT_BIT); - /* set the carry to zero */ - u = 0; + /* fix digit */ + x->dp[ix + iy] = (mp_digit)(r & (mp_word)MP_MASK); + } + /* At this point the ix'th digit of x should be zero */ - /* Multiply and add in place */ - for (iy = 0; iy < n->used; iy++) { - /* compute product and sum */ - r = ((mp_word)mu * (mp_word)*tmpn++) + - (mp_word)u + (mp_word)*tmpx; - - /* get carry */ - u = (mp_digit)(r >> (mp_word)MP_DIGIT_BIT); - - /* fix digit */ - *tmpx++ = (mp_digit)(r & (mp_word)MP_MASK); - } - /* At this point the ix'th digit of x should be zero */ - - - /* propagate carries upwards as required*/ - while (u != 0u) { - *tmpx += u; - u = *tmpx >> MP_DIGIT_BIT; - *tmpx++ &= MP_MASK; - } + /* propagate carries upwards as required*/ + while (u != 0u) { + x->dp[ix + iy] += u; + u = x->dp[ix + iy] >> MP_DIGIT_BIT; + x->dp[ix + iy] &= MP_MASK; + ++iy; } } diff --git a/mp_mul.c b/mp_mul.c index 9c8f8ae..4103535 100644 --- a/mp_mul.c +++ b/mp_mul.c @@ -7,31 +7,31 @@ mp_err mp_mul(const mp_int *a, const mp_int *b, mp_int *c) { mp_err err; - int min_len = MP_MIN(a->used, b->used), - max_len = MP_MAX(a->used, b->used), + int min = MP_MIN(a->used, b->used), + max = MP_MAX(a->used, b->used), digs = a->used + b->used + 1; mp_sign neg = (a->sign == b->sign) ? MP_ZPOS : MP_NEG; - if (MP_HAS(S_MP_BALANCE_MUL) && + if (MP_HAS(S_MP_MUL_BALANCE) && /* Check sizes. The smaller one needs to be larger than the Karatsuba cut-off. - * The bigger one needs to be at least about one MP_KARATSUBA_MUL_CUTOFF bigger + * The bigger one needs to be at least about one MP_MUL_KARATSUBA_CUTOFF bigger * to make some sense, but it depends on architecture, OS, position of the * stars... so YMMV. - * Using it to cut the input into slices small enough for s_mp_mul_digs_fast + * Using it to cut the input into slices small enough for s_mp_mul_comba * was actually slower on the author's machine, but YMMV. */ - (min_len >= MP_KARATSUBA_MUL_CUTOFF) && - ((max_len / 2) >= MP_KARATSUBA_MUL_CUTOFF) && + (min >= MP_MUL_KARATSUBA_CUTOFF) && + ((max / 2) >= MP_MUL_KARATSUBA_CUTOFF) && /* Not much effect was observed below a ratio of 1:2, but again: YMMV. */ - (max_len >= (2 * min_len))) { - err = s_mp_balance_mul(a,b,c); - } else if (MP_HAS(S_MP_TOOM_MUL) && - (min_len >= MP_TOOM_MUL_CUTOFF)) { - err = s_mp_toom_mul(a, b, c); - } else if (MP_HAS(S_MP_KARATSUBA_MUL) && - (min_len >= MP_KARATSUBA_MUL_CUTOFF)) { - err = s_mp_karatsuba_mul(a, b, c); - } else if (MP_HAS(S_MP_MUL_DIGS_FAST) && + (max >= (2 * min))) { + err = s_mp_mul_balance(a,b,c); + } else if (MP_HAS(S_MP_MUL_TOOM) && + (min >= MP_MUL_TOOM_CUTOFF)) { + err = s_mp_mul_toom(a, b, c); + } else if (MP_HAS(S_MP_MUL_KARATSUBA) && + (min >= MP_MUL_KARATSUBA_CUTOFF)) { + err = s_mp_mul_karatsuba(a, b, c); + } else if (MP_HAS(S_MP_MUL_COMBA) && /* can we use the fast multiplier? * * The fast multiplier can be used if the output will @@ -39,10 +39,10 @@ mp_err mp_mul(const mp_int *a, const mp_int *b, mp_int *c) * digits won't affect carry propagation */ (digs < MP_WARRAY) && - (min_len <= MP_MAXFAST)) { - err = s_mp_mul_digs_fast(a, b, c, digs); - } else if (MP_HAS(S_MP_MUL_DIGS)) { - err = s_mp_mul_digs(a, b, c, digs); + (min <= MP_MAXFAST)) { + err = s_mp_mul_comba(a, b, c, digs); + } else if (MP_HAS(S_MP_MUL)) { + err = s_mp_mul(a, b, c, digs); } else { err = MP_VAL; } diff --git a/mp_mul_2.c b/mp_mul_2.c index cd5589d..7d7084b 100644 --- a/mp_mul_2.c +++ b/mp_mul_2.c @@ -6,58 +6,47 @@ /* b = a*2 */ mp_err mp_mul_2(const mp_int *a, mp_int *b) { - int x, oldused; mp_err err; + int x, oldused; + mp_digit r; /* grow to accomodate result */ - if (b->alloc < (a->used + 1)) { - if ((err = mp_grow(b, a->used + 1)) != MP_OKAY) { - return err; - } + if ((err = mp_grow(b, a->used + 1)) != MP_OKAY) { + return err; } oldused = b->used; b->used = a->used; - { - mp_digit r, rr, *tmpa, *tmpb; + /* carry */ + r = 0; + for (x = 0; x < a->used; x++) { - /* alias for source */ - tmpa = a->dp; - - /* alias for dest */ - tmpb = b->dp; - - /* carry */ - r = 0; - for (x = 0; x < a->used; x++) { - - /* get what will be the *next* carry bit from the - * MSB of the current digit - */ - rr = *tmpa >> (mp_digit)(MP_DIGIT_BIT - 1); - - /* now shift up this digit, add in the carry [from the previous] */ - *tmpb++ = ((*tmpa++ << 1uL) | r) & MP_MASK; - - /* copy the carry that would be from the source - * digit into the next iteration - */ - r = rr; - } - - /* new leading digit? */ - if (r != 0u) { - /* add a MSB which is always 1 at this point */ - *tmpb = 1; - ++(b->used); - } - - /* now zero any excess digits on the destination - * that we didn't write to + /* get what will be the *next* carry bit from the + * MSB of the current digit */ - MP_ZERO_DIGITS(b->dp + b->used, oldused - b->used); + mp_digit rr = a->dp[x] >> (mp_digit)(MP_DIGIT_BIT - 1); + + /* now shift up this digit, add in the carry [from the previous] */ + b->dp[x] = ((a->dp[x] << 1uL) | r) & MP_MASK; + + /* copy the carry that would be from the source + * digit into the next iteration + */ + r = rr; } + + /* new leading digit? */ + if (r != 0u) { + /* add a MSB which is always 1 at this point */ + b->dp[b->used++] = 1; + } + + /* now zero any excess digits on the destination + * that we didn't write to + */ + s_mp_zero_digs(b->dp + b->used, oldused - b->used); + b->sign = a->sign; return MP_OKAY; } diff --git a/mp_mul_2d.c b/mp_mul_2d.c index 1ba53a0..e458137 100644 --- a/mp_mul_2d.c +++ b/mp_mul_2d.c @@ -6,20 +6,18 @@ /* shift left by a certain bit count */ mp_err mp_mul_2d(const mp_int *a, int b, mp_int *c) { - mp_digit d; - mp_err err; + mp_err err; - /* copy */ - if (a != c) { - if ((err = mp_copy(a, c)) != MP_OKAY) { - return err; - } + if (b < 0) { + return MP_VAL; } - if (c->alloc < (c->used + (b / MP_DIGIT_BIT) + 1)) { - if ((err = mp_grow(c, c->used + (b / MP_DIGIT_BIT) + 1)) != MP_OKAY) { - return err; - } + if ((err = mp_copy(a, c)) != MP_OKAY) { + return err; + } + + if ((err = mp_grow(c, c->used + (b / MP_DIGIT_BIT) + 1)) != MP_OKAY) { + return err; } /* shift by as many digits in the bit count */ @@ -30,29 +28,25 @@ mp_err mp_mul_2d(const mp_int *a, int b, mp_int *c) } /* shift any bit count < MP_DIGIT_BIT */ - d = (mp_digit)(b % MP_DIGIT_BIT); - if (d != 0u) { - mp_digit *tmpc, shift, mask, r, rr; + b %= MP_DIGIT_BIT; + if (b != 0u) { + mp_digit shift, mask, r; int x; /* bitmask for carries */ - mask = ((mp_digit)1 << d) - (mp_digit)1; + mask = ((mp_digit)1 << b) - (mp_digit)1; /* shift for msbs */ - shift = (mp_digit)MP_DIGIT_BIT - d; - - /* alias */ - tmpc = c->dp; + shift = (mp_digit)(MP_DIGIT_BIT - b); /* carry */ r = 0; for (x = 0; x < c->used; x++) { /* get the higher bits of the current word */ - rr = (*tmpc >> shift) & mask; + mp_digit rr = (c->dp[x] >> shift) & mask; /* shift the current word and OR in the carry */ - *tmpc = ((*tmpc << d) | r) & MP_MASK; - ++tmpc; + c->dp[x] = ((c->dp[x] << b) | r) & MP_MASK; /* set the carry to the carry bits of the current word */ r = rr; diff --git a/mp_mul_d.c b/mp_mul_d.c index 399dc7b..30d6c93 100644 --- a/mp_mul_d.c +++ b/mp_mul_d.c @@ -6,54 +6,45 @@ /* multiply by a digit */ mp_err mp_mul_d(const mp_int *a, mp_digit b, mp_int *c) { - mp_digit u, *tmpa, *tmpc; - mp_word r; + mp_digit u; mp_err err; - int ix, olduse; + int ix, oldused; /* make sure c is big enough to hold a*b */ - if (c->alloc < (a->used + 1)) { - if ((err = mp_grow(c, a->used + 1)) != MP_OKAY) { - return err; - } + if ((err = mp_grow(c, a->used + 1)) != MP_OKAY) { + return err; } /* get the original destinations used count */ - olduse = c->used; + oldused = c->used; /* set the sign */ c->sign = a->sign; - /* alias for a->dp [source] */ - tmpa = a->dp; - - /* alias for c->dp [dest] */ - tmpc = c->dp; - /* zero carry */ u = 0; /* compute columns */ for (ix = 0; ix < a->used; ix++) { /* compute product and carry sum for this term */ - r = (mp_word)u + ((mp_word)*tmpa++ * (mp_word)b); + mp_word r = (mp_word)u + ((mp_word)a->dp[ix] * (mp_word)b); /* mask off higher bits to get a single digit */ - *tmpc++ = (mp_digit)(r & (mp_word)MP_MASK); + c->dp[ix] = (mp_digit)(r & (mp_word)MP_MASK); /* send carry into next iteration */ u = (mp_digit)(r >> (mp_word)MP_DIGIT_BIT); } /* store final carry [if any] and increment ix offset */ - *tmpc++ = u; - ++ix; - - /* now zero digits above the top */ - MP_ZERO_DIGITS(tmpc, olduse - ix); + c->dp[ix] = u; /* set used count */ c->used = a->used + 1; + + /* now zero digits above the top */ + s_mp_zero_digs(c->dp + c->used, oldused - c->used); + mp_clamp(c); return MP_OKAY; diff --git a/mp_neg.c b/mp_neg.c index f54ef3e..bfb6eb9 100644 --- a/mp_neg.c +++ b/mp_neg.c @@ -6,11 +6,9 @@ /* b = -a */ mp_err mp_neg(const mp_int *a, mp_int *b) { - if (a != b) { - mp_err err; - if ((err = mp_copy(a, b)) != MP_OKAY) { - return err; - } + mp_err err; + if ((err = mp_copy(a, b)) != MP_OKAY) { + return err; } b->sign = mp_iszero(b) || b->sign == MP_NEG ? MP_ZPOS : MP_NEG; diff --git a/mp_or.c b/mp_or.c index 7fa1375..5cf5255 100644 --- a/mp_or.c +++ b/mp_or.c @@ -11,10 +11,8 @@ mp_err mp_or(const mp_int *a, const mp_int *b, mp_int *c) mp_digit ac = 1, bc = 1, cc = 1; mp_sign csign = ((a->sign == MP_NEG) || (b->sign == MP_NEG)) ? MP_NEG : MP_ZPOS; - if (c->alloc < used) { - if ((err = mp_grow(c, used)) != MP_OKAY) { - return err; - } + if ((err = mp_grow(c, used)) != MP_OKAY) { + return err; } for (i = 0; i < used; i++) { diff --git a/mp_prime_fermat.c b/mp_prime_fermat.c index 50d2e5e..ac8116f 100644 --- a/mp_prime_fermat.c +++ b/mp_prime_fermat.c @@ -16,9 +16,6 @@ mp_err mp_prime_fermat(const mp_int *a, const mp_int *b, bool *result) mp_int t; mp_err err; - /* default to composite */ - *result = false; - /* ensure b > 1 */ if (mp_cmp_d(b, 1uL) != MP_GT) { return MP_VAL; @@ -31,16 +28,13 @@ mp_err mp_prime_fermat(const mp_int *a, const mp_int *b, bool *result) /* compute t = b**a mod a */ if ((err = mp_exptmod(b, a, a, &t)) != MP_OKAY) { - goto LBL_T; + goto LBL_ERR; } /* is it equal to b? */ - if (mp_cmp(&t, b) == MP_EQ) { - *result = true; - } + *result = mp_cmp(&t, b) == MP_EQ; - err = MP_OKAY; -LBL_T: +LBL_ERR: mp_clear(&t); return err; } diff --git a/mp_prime_frobenius_underwood.c b/mp_prime_frobenius_underwood.c index 543b8b4..62d3476 100644 --- a/mp_prime_frobenius_underwood.c +++ b/mp_prime_frobenius_underwood.c @@ -23,17 +23,16 @@ mp_err mp_prime_frobenius_underwood(const mp_int *N, bool *result) { mp_int T1z, T2z, Np1z, sz, tz; - - int a, ap2, length, i, j; + int a, ap2, i; mp_err err; - *result = false; - if ((err = mp_init_multi(&T1z, &T2z, &Np1z, &sz, &tz, NULL)) != MP_OKAY) { return err; } for (a = 0; a < LTM_FROBENIUS_UNDERWOOD_A; a++) { + int j; + /* TODO: That's ugly! No, really, it is! */ if ((a==2) || (a==4) || (a==7) || (a==8) || (a==10) || (a==14) || (a==18) || (a==23) || (a==26) || (a==28)) { @@ -42,7 +41,7 @@ mp_err mp_prime_frobenius_underwood(const mp_int *N, bool *result) mp_set_i32(&T1z, (int32_t)((a * a) - 4)); - if ((err = mp_kronecker(&T1z, N, &j)) != MP_OKAY) goto LBL_FU_ERR; + if ((err = mp_kronecker(&T1z, N, &j)) != MP_OKAY) goto LBL_END; if (j == -1) { break; @@ -50,73 +49,76 @@ mp_err mp_prime_frobenius_underwood(const mp_int *N, bool *result) if (j == 0) { /* composite */ - goto LBL_FU_ERR; + *result = false; + goto LBL_END; } } /* Tell it a composite and set return value accordingly */ if (a >= LTM_FROBENIUS_UNDERWOOD_A) { err = MP_ITER; - goto LBL_FU_ERR; + goto LBL_END; } /* Composite if N and (a+4)*(2*a+5) are not coprime */ mp_set_u32(&T1z, (uint32_t)((a+4)*((2*a)+5))); - if ((err = mp_gcd(N, &T1z, &T1z)) != MP_OKAY) goto LBL_FU_ERR; + if ((err = mp_gcd(N, &T1z, &T1z)) != MP_OKAY) goto LBL_END; - if (!((T1z.used == 1) && (T1z.dp[0] == 1u))) goto LBL_FU_ERR; + if (!((T1z.used == 1) && (T1z.dp[0] == 1u))) { + /* composite */ + *result = false; + goto LBL_END; + } ap2 = a + 2; - if ((err = mp_add_d(N, 1uL, &Np1z)) != MP_OKAY) goto LBL_FU_ERR; + if ((err = mp_add_d(N, 1uL, &Np1z)) != MP_OKAY) goto LBL_END; mp_set(&sz, 1uL); mp_set(&tz, 2uL); - length = mp_count_bits(&Np1z); - for (i = length - 2; i >= 0; i--) { + for (i = mp_count_bits(&Np1z) - 2; i >= 0; i--) { /* * temp = (sz*(a*sz+2*tz))%N; * tz = ((tz-sz)*(tz+sz))%N; * sz = temp; */ - if ((err = mp_mul_2(&tz, &T2z)) != MP_OKAY) goto LBL_FU_ERR; + if ((err = mp_mul_2(&tz, &T2z)) != MP_OKAY) goto LBL_END; /* a = 0 at about 50% of the cases (non-square and odd input) */ if (a != 0) { - if ((err = mp_mul_d(&sz, (mp_digit)a, &T1z)) != MP_OKAY) goto LBL_FU_ERR; - if ((err = mp_add(&T1z, &T2z, &T2z)) != MP_OKAY) goto LBL_FU_ERR; + if ((err = mp_mul_d(&sz, (mp_digit)a, &T1z)) != MP_OKAY) goto LBL_END; + if ((err = mp_add(&T1z, &T2z, &T2z)) != MP_OKAY) goto LBL_END; } - if ((err = mp_mul(&T2z, &sz, &T1z)) != MP_OKAY) goto LBL_FU_ERR; - if ((err = mp_sub(&tz, &sz, &T2z)) != MP_OKAY) goto LBL_FU_ERR; - if ((err = mp_add(&sz, &tz, &sz)) != MP_OKAY) goto LBL_FU_ERR; - if ((err = mp_mul(&sz, &T2z, &tz)) != MP_OKAY) goto LBL_FU_ERR; - if ((err = mp_mod(&tz, N, &tz)) != MP_OKAY) goto LBL_FU_ERR; - if ((err = mp_mod(&T1z, N, &sz)) != MP_OKAY) goto LBL_FU_ERR; - if (s_mp_get_bit(&Np1z, (unsigned int)i)) { + if ((err = mp_mul(&T2z, &sz, &T1z)) != MP_OKAY) goto LBL_END; + if ((err = mp_sub(&tz, &sz, &T2z)) != MP_OKAY) goto LBL_END; + if ((err = mp_add(&sz, &tz, &sz)) != MP_OKAY) goto LBL_END; + if ((err = mp_mul(&sz, &T2z, &tz)) != MP_OKAY) goto LBL_END; + if ((err = mp_mod(&tz, N, &tz)) != MP_OKAY) goto LBL_END; + if ((err = mp_mod(&T1z, N, &sz)) != MP_OKAY) goto LBL_END; + if (s_mp_get_bit(&Np1z, i)) { /* * temp = (a+2) * sz + tz * tz = 2 * tz - sz * sz = temp */ if (a == 0) { - if ((err = mp_mul_2(&sz, &T1z)) != MP_OKAY) goto LBL_FU_ERR; + if ((err = mp_mul_2(&sz, &T1z)) != MP_OKAY) goto LBL_END; } else { - if ((err = mp_mul_d(&sz, (mp_digit)ap2, &T1z)) != MP_OKAY) goto LBL_FU_ERR; + if ((err = mp_mul_d(&sz, (mp_digit)ap2, &T1z)) != MP_OKAY) goto LBL_END; } - if ((err = mp_add(&T1z, &tz, &T1z)) != MP_OKAY) goto LBL_FU_ERR; - if ((err = mp_mul_2(&tz, &T2z)) != MP_OKAY) goto LBL_FU_ERR; - if ((err = mp_sub(&T2z, &sz, &tz)) != MP_OKAY) goto LBL_FU_ERR; + if ((err = mp_add(&T1z, &tz, &T1z)) != MP_OKAY) goto LBL_END; + if ((err = mp_mul_2(&tz, &T2z)) != MP_OKAY) goto LBL_END; + if ((err = mp_sub(&T2z, &sz, &tz)) != MP_OKAY) goto LBL_END; mp_exch(&sz, &T1z); } } mp_set_u32(&T1z, (uint32_t)((2 * a) + 5)); - if ((err = mp_mod(&T1z, N, &T1z)) != MP_OKAY) goto LBL_FU_ERR; - if (mp_iszero(&sz) && (mp_cmp(&tz, &T1z) == MP_EQ)) { - *result = true; - } + if ((err = mp_mod(&T1z, N, &T1z)) != MP_OKAY) goto LBL_END; -LBL_FU_ERR: + *result = mp_iszero(&sz) && (mp_cmp(&tz, &T1z) == MP_EQ); + +LBL_END: mp_clear_multi(&tz, &sz, &Np1z, &T2z, &T1z, NULL); return err; } diff --git a/mp_prime_is_prime.c b/mp_prime_is_prime.c index d0eca2c..7d73864 100644 --- a/mp_prime_is_prime.c +++ b/mp_prime_is_prime.c @@ -13,14 +13,12 @@ static unsigned int s_floor_ilog2(int value) return r; } - mp_err mp_prime_is_prime(const mp_int *a, int t, bool *result) { mp_int b; - int ix, p_max = 0, size_a, len; - bool res; + int ix; + bool res; mp_err err; - unsigned int fips_rand, mask; /* default to no */ *result = false; @@ -133,6 +131,8 @@ mp_err mp_prime_is_prime(const mp_int *a, int t, bool *result) TODO: can be made a bit finer grained but comparing is not free. */ if (t < 0) { + int p_max = 0; + /* Sorenson, Jonathan; Webster, Jonathan (2015). "Strong Pseudoprimes to Twelve Prime Bases". @@ -174,6 +174,9 @@ mp_err mp_prime_is_prime(const mp_int *a, int t, bool *result) See Fips 186.4 p. 126ff */ else if (t > 0) { + unsigned int mask; + int size_a; + /* * The mp_digit's have a defined bit-size but the size of the * array a.dp is a simple 'int' and this library can not assume full @@ -219,6 +222,9 @@ mp_err mp_prime_is_prime(const mp_int *a, int t, bool *result) need to be prime. */ for (ix = 0; ix < t; ix++) { + unsigned int fips_rand; + int len; + /* mp_rand() guarantees the first digit to be non-zero */ if ((err = mp_rand(&b, 1)) != MP_OKAY) { goto LBL_B; diff --git a/mp_prime_miller_rabin.c b/mp_prime_miller_rabin.c index a3af8bc..4c23a9f 100644 --- a/mp_prime_miller_rabin.c +++ b/mp_prime_miller_rabin.c @@ -16,9 +16,6 @@ mp_err mp_prime_miller_rabin(const mp_int *a, const mp_int *b, bool *result) mp_err err; int s, j; - /* default */ - *result = false; - /* ensure b > 1 */ if (mp_cmp_d(b, 1uL) != MP_GT) { return MP_VAL; @@ -29,12 +26,12 @@ mp_err mp_prime_miller_rabin(const mp_int *a, const mp_int *b, bool *result) return err; } if ((err = mp_sub_d(&n1, 1uL, &n1)) != MP_OKAY) { - goto LBL_N1; + goto LBL_ERR1; } /* set 2**s * r = n1 */ if ((err = mp_init_copy(&r, &n1)) != MP_OKAY) { - goto LBL_N1; + goto LBL_ERR1; } /* count the number of least significant bits @@ -44,15 +41,15 @@ mp_err mp_prime_miller_rabin(const mp_int *a, const mp_int *b, bool *result) /* now divide n - 1 by 2**s */ if ((err = mp_div_2d(&r, s, &r, NULL)) != MP_OKAY) { - goto LBL_R; + goto LBL_ERR2; } /* compute y = b**r mod a */ if ((err = mp_init(&y)) != MP_OKAY) { - goto LBL_R; + goto LBL_ERR2; } if ((err = mp_exptmod(b, &r, a, &y)) != MP_OKAY) { - goto LBL_Y; + goto LBL_END; } /* if y != 1 and y != n1 do */ @@ -61,12 +58,13 @@ mp_err mp_prime_miller_rabin(const mp_int *a, const mp_int *b, bool *result) /* while j <= s-1 and y != n1 */ while ((j <= (s - 1)) && (mp_cmp(&y, &n1) != MP_EQ)) { if ((err = mp_sqrmod(&y, a, &y)) != MP_OKAY) { - goto LBL_Y; + goto LBL_END; } /* if y == 1 then composite */ if (mp_cmp_d(&y, 1uL) == MP_EQ) { - goto LBL_Y; + *result = false; + goto LBL_END; } ++j; @@ -74,17 +72,19 @@ mp_err mp_prime_miller_rabin(const mp_int *a, const mp_int *b, bool *result) /* if y != n1 then composite */ if (mp_cmp(&y, &n1) != MP_EQ) { - goto LBL_Y; + *result = false; + goto LBL_END; } } /* probably prime now */ *result = true; -LBL_Y: + +LBL_END: mp_clear(&y); -LBL_R: +LBL_ERR2: mp_clear(&r); -LBL_N1: +LBL_ERR1: mp_clear(&n1); return err; } diff --git a/mp_prime_next_prime.c b/mp_prime_next_prime.c index 40c94a4..6faa08d 100644 --- a/mp_prime_next_prime.c +++ b/mp_prime_next_prime.c @@ -10,11 +10,10 @@ */ mp_err mp_prime_next_prime(mp_int *a, int t, bool bbs_style) { - int x, y; - mp_ord cmp; + int x; mp_err err; bool res = false; - mp_digit res_tab[MP_PRIME_TAB_SIZE], step, kstep; + mp_digit res_tab[MP_PRIME_TAB_SIZE], kstep; mp_int b; /* force positive */ @@ -24,7 +23,7 @@ mp_err mp_prime_next_prime(mp_int *a, int t, bool bbs_style) if (mp_cmp_d(a, s_mp_prime_tab[MP_PRIME_TAB_SIZE-1]) == MP_LT) { /* find which prime it is bigger than "a" */ for (x = 0; x < MP_PRIME_TAB_SIZE; x++) { - cmp = mp_cmp_d(a, s_mp_prime_tab[x]); + mp_ord cmp = mp_cmp_d(a, s_mp_prime_tab[x]); if (cmp == MP_EQ) { continue; } @@ -42,11 +41,7 @@ mp_err mp_prime_next_prime(mp_int *a, int t, bool bbs_style) } /* generate a prime congruent to 3 mod 4 or 1/3 mod 4? */ - if (bbs_style) { - kstep = 4; - } else { - kstep = 2; - } + kstep = bbs_style ? 4 : 2; /* at this point we will use a combination of a sieve and Miller-Rabin */ @@ -79,11 +74,12 @@ mp_err mp_prime_next_prime(mp_int *a, int t, bool bbs_style) } for (;;) { + mp_digit step = 0; + bool y; /* skip to the next non-trivially divisible candidate */ - step = 0; do { - /* y == 1 if any residue was zero [e.g. cannot be prime] */ - y = 0; + /* y == true if any residue was zero [e.g. cannot be prime] */ + y = false; /* increase step to next candidate */ step += kstep; @@ -100,10 +96,10 @@ mp_err mp_prime_next_prime(mp_int *a, int t, bool bbs_style) /* set flag if zero */ if (res_tab[x] == 0u) { - y = 1; + y = true; } } - } while ((y == 1) && (step < (((mp_digit)1 << MP_DIGIT_BIT) - kstep))); + } while (y && (step < (((mp_digit)1 << MP_DIGIT_BIT) - kstep))); /* add the step */ if ((err = mp_add_d(a, step, a)) != MP_OKAY) { @@ -111,7 +107,7 @@ mp_err mp_prime_next_prime(mp_int *a, int t, bool bbs_style) } /* if didn't pass sieve and step == MP_MAX then skip test */ - if ((y == 1) && (step >= (((mp_digit)1 << MP_DIGIT_BIT) - kstep))) { + if (y && (step >= (((mp_digit)1 << MP_DIGIT_BIT) - kstep))) { continue; } @@ -123,7 +119,6 @@ mp_err mp_prime_next_prime(mp_int *a, int t, bool bbs_style) } } - err = MP_OKAY; LBL_ERR: mp_clear(&b); return err; diff --git a/mp_prime_rand.c b/mp_prime_rand.c index 8476b4f..c5cebbd 100644 --- a/mp_prime_rand.c +++ b/mp_prime_rand.c @@ -116,7 +116,7 @@ mp_err mp_prime_rand(mp_int *a, int t, int size, int flags) err = MP_OKAY; LBL_ERR: - MP_FREE_BUFFER(tmp, (size_t)bsize); + MP_FREE_BUF(tmp, (size_t)bsize); return err; } diff --git a/mp_prime_strong_lucas_selfridge.c b/mp_prime_strong_lucas_selfridge.c index df5de96..6262e07 100644 --- a/mp_prime_strong_lucas_selfridge.c +++ b/mp_prime_strong_lucas_selfridge.c @@ -192,7 +192,7 @@ mp_err mp_prime_strong_lucas_selfridge(const mp_int *a, bool *result) if ((err = mp_mod(&Qmz, a, &Qmz)) != MP_OKAY) goto LBL_LS_ERR; if ((err = mp_mul_2(&Qmz, &Q2mz)) != MP_OKAY) goto LBL_LS_ERR; - if (s_mp_get_bit(&Dz, (unsigned int)u)) { + if (s_mp_get_bit(&Dz, u)) { /* Formulas for addition of indices (carried out mod N); * * U_(m+n) = (U_m*V_n + U_n*V_m)/2 diff --git a/mp_reduce.c b/mp_reduce.c index 5226fe7..b6fae55 100644 --- a/mp_reduce.c +++ b/mp_reduce.c @@ -26,12 +26,12 @@ mp_err mp_reduce(mp_int *x, const mp_int *m, const mp_int *mu) if ((err = mp_mul(&q, mu, &q)) != MP_OKAY) { goto LBL_ERR; } - } else if (MP_HAS(S_MP_MUL_HIGH_DIGS)) { - if ((err = s_mp_mul_high_digs(&q, mu, &q, um)) != MP_OKAY) { + } else if (MP_HAS(S_MP_MUL_HIGH)) { + if ((err = s_mp_mul_high(&q, mu, &q, um)) != MP_OKAY) { goto LBL_ERR; } - } else if (MP_HAS(S_MP_MUL_HIGH_DIGS_FAST)) { - if ((err = s_mp_mul_high_digs_fast(&q, mu, &q, um)) != MP_OKAY) { + } else if (MP_HAS(S_MP_MUL_HIGH_COMBA)) { + if ((err = s_mp_mul_high_comba(&q, mu, &q, um)) != MP_OKAY) { goto LBL_ERR; } } else { @@ -48,7 +48,7 @@ mp_err mp_reduce(mp_int *x, const mp_int *m, const mp_int *mu) } /* q = q * m mod b**(k+1), quick (no division) */ - if ((err = s_mp_mul_digs(&q, m, &q, um + 1)) != MP_OKAY) { + if ((err = s_mp_mul(&q, m, &q, um + 1)) != MP_OKAY) { goto LBL_ERR; } diff --git a/mp_reduce_2k.c b/mp_reduce_2k.c index 5d3c7f9..e635f5b 100644 --- a/mp_reduce_2k.c +++ b/mp_reduce_2k.c @@ -8,36 +8,37 @@ mp_err mp_reduce_2k(mp_int *a, const mp_int *n, mp_digit d) { mp_int q; mp_err err; - int p; + int p; if ((err = mp_init(&q)) != MP_OKAY) { return err; } p = mp_count_bits(n); -top: - /* q = a/2**p, a = a mod 2**p */ - if ((err = mp_div_2d(a, p, &q, a)) != MP_OKAY) { - goto LBL_ERR; - } - - if (d != 1u) { - /* q = q * d */ - if ((err = mp_mul_d(&q, d, &q)) != MP_OKAY) { + for (;;) { + /* q = a/2**p, a = a mod 2**p */ + if ((err = mp_div_2d(a, p, &q, a)) != MP_OKAY) { goto LBL_ERR; } - } - /* a = a + q */ - if ((err = s_mp_add(a, &q, a)) != MP_OKAY) { - goto LBL_ERR; - } + if (d != 1u) { + /* q = q * d */ + if ((err = mp_mul_d(&q, d, &q)) != MP_OKAY) { + goto LBL_ERR; + } + } - if (mp_cmp_mag(a, n) != MP_LT) { + /* a = a + q */ + if ((err = s_mp_add(a, &q, a)) != MP_OKAY) { + goto LBL_ERR; + } + + if (mp_cmp_mag(a, n) == MP_LT) { + break; + } if ((err = s_mp_sub(a, n, a)) != MP_OKAY) { goto LBL_ERR; } - goto top; } LBL_ERR: diff --git a/mp_reduce_2k_l.c b/mp_reduce_2k_l.c index 6328cbc..31d9a18 100644 --- a/mp_reduce_2k_l.c +++ b/mp_reduce_2k_l.c @@ -18,27 +18,30 @@ mp_err mp_reduce_2k_l(mp_int *a, const mp_int *n, const mp_int *d) } p = mp_count_bits(n); -top: - /* q = a/2**p, a = a mod 2**p */ - if ((err = mp_div_2d(a, p, &q, a)) != MP_OKAY) { - goto LBL_ERR; - } - /* q = q * d */ - if ((err = mp_mul(&q, d, &q)) != MP_OKAY) { - goto LBL_ERR; - } + for (;;) { + /* q = a/2**p, a = a mod 2**p */ + if ((err = mp_div_2d(a, p, &q, a)) != MP_OKAY) { + goto LBL_ERR; + } - /* a = a + q */ - if ((err = s_mp_add(a, &q, a)) != MP_OKAY) { - goto LBL_ERR; - } + /* q = q * d */ + if ((err = mp_mul(&q, d, &q)) != MP_OKAY) { + goto LBL_ERR; + } - if (mp_cmp_mag(a, n) != MP_LT) { + /* a = a + q */ + if ((err = s_mp_add(a, &q, a)) != MP_OKAY) { + goto LBL_ERR; + } + + if (mp_cmp_mag(a, n) == MP_LT) { + break; + } if ((err = s_mp_sub(a, n, a)) != MP_OKAY) { goto LBL_ERR; } - goto top; + } LBL_ERR: diff --git a/mp_reduce_2k_setup.c b/mp_reduce_2k_setup.c index 0f3fd29..51f8841 100644 --- a/mp_reduce_2k_setup.c +++ b/mp_reduce_2k_setup.c @@ -8,25 +8,23 @@ mp_err mp_reduce_2k_setup(const mp_int *a, mp_digit *d) { mp_err err; mp_int tmp; - int p; if ((err = mp_init(&tmp)) != MP_OKAY) { return err; } - p = mp_count_bits(a); - if ((err = mp_2expt(&tmp, p)) != MP_OKAY) { - mp_clear(&tmp); - return err; + if ((err = mp_2expt(&tmp, mp_count_bits(a))) != MP_OKAY) { + goto LBL_ERR; } if ((err = s_mp_sub(&tmp, a, &tmp)) != MP_OKAY) { - mp_clear(&tmp); - return err; + goto LBL_ERR; } *d = tmp.dp[0]; + +LBL_ERR: mp_clear(&tmp); - return MP_OKAY; + return err; } #endif diff --git a/mp_rshd.c b/mp_rshd.c index d798907..3f0a941 100644 --- a/mp_rshd.c +++ b/mp_rshd.c @@ -35,7 +35,7 @@ void mp_rshd(mp_int *a, int b) } /* zero the top digits */ - MP_ZERO_DIGITS(a->dp + a->used - b, b); + s_mp_zero_digs(a->dp + a->used - b, b); /* remove excess digits */ a->used -= b; diff --git a/mp_set.c b/mp_set.c index 3ee5f81..bc0c4da 100644 --- a/mp_set.c +++ b/mp_set.c @@ -10,6 +10,6 @@ void mp_set(mp_int *a, mp_digit b) a->dp[0] = b & MP_MASK; a->sign = MP_ZPOS; a->used = (a->dp[0] != 0u) ? 1 : 0; - MP_ZERO_DIGITS(a->dp + a->used, oldused - a->used); + s_mp_zero_digs(a->dp + a->used, oldused - a->used); } #endif diff --git a/mp_sqr.c b/mp_sqr.c index e38130b..b0da0ed 100644 --- a/mp_sqr.c +++ b/mp_sqr.c @@ -7,16 +7,16 @@ mp_err mp_sqr(const mp_int *a, mp_int *b) { mp_err err; - if (MP_HAS(S_MP_TOOM_SQR) && /* use Toom-Cook? */ - (a->used >= MP_TOOM_SQR_CUTOFF)) { - err = s_mp_toom_sqr(a, b); - } else if (MP_HAS(S_MP_KARATSUBA_SQR) && /* Karatsuba? */ - (a->used >= MP_KARATSUBA_SQR_CUTOFF)) { - err = s_mp_karatsuba_sqr(a, b); - } else if (MP_HAS(S_MP_SQR_FAST) && /* can we use the fast comba multiplier? */ + if (MP_HAS(S_MP_SQR_TOOM) && /* use Toom-Cook? */ + (a->used >= MP_SQR_TOOM_CUTOFF)) { + err = s_mp_sqr_toom(a, b); + } else if (MP_HAS(S_MP_SQR_KARATSUBA) && /* Karatsuba? */ + (a->used >= MP_SQR_KARATSUBA_CUTOFF)) { + err = s_mp_sqr_karatsuba(a, b); + } else if (MP_HAS(S_MP_SQR_COMBA) && /* can we use the fast comba multiplier? */ (((a->used * 2) + 1) < MP_WARRAY) && (a->used < (MP_MAXFAST / 2))) { - err = s_mp_sqr_fast(a, b); + err = s_mp_sqr_comba(a, b); } else if (MP_HAS(S_MP_SQR)) { err = s_mp_sqr(a, b); } else { diff --git a/mp_sub_d.c b/mp_sub_d.c index 96a747c..e80df3d 100644 --- a/mp_sub_d.c +++ b/mp_sub_d.c @@ -6,9 +6,8 @@ /* single digit subtraction */ mp_err mp_sub_d(const mp_int *a, mp_digit b, mp_int *c) { - mp_digit *tmpa, *tmpc; - mp_err err; - int ix, oldused; + mp_err err; + int oldused; /* fast path for a == c */ if (a == c) { @@ -25,10 +24,8 @@ mp_err mp_sub_d(const mp_int *a, mp_digit b, mp_int *c) } /* grow c as required */ - if (c->alloc < (a->used + 1)) { - if ((err = mp_grow(c, a->used + 1)) != MP_OKAY) { - return err; - } + if ((err = mp_grow(c, a->used + 1)) != MP_OKAY) { + return err; } /* if a is negative just do an unsigned @@ -46,24 +43,17 @@ mp_err mp_sub_d(const mp_int *a, mp_digit b, mp_int *c) return err; } - /* setup regs */ oldused = c->used; - tmpa = a->dp; - tmpc = c->dp; /* if a <= b simply fix the single digit */ if (((a->used == 1) && (a->dp[0] <= b)) || (a->used == 0)) { - if (a->used == 1) { - *tmpc++ = b - *tmpa; - } else { - *tmpc++ = b; - } - ix = 1; + c->dp[0] = (a->used == 1) ? b - a->dp[0] : b; /* negative/1digit */ c->sign = MP_NEG; c->used = 1; } else { + int i; mp_digit mu = b; /* positive/size */ @@ -71,15 +61,15 @@ mp_err mp_sub_d(const mp_int *a, mp_digit b, mp_int *c) c->used = a->used; /* subtract digits, mu is carry */ - for (ix = 0; ix < a->used; ix++) { - *tmpc = *tmpa++ - mu; - mu = *tmpc >> (MP_SIZEOF_BITS(mp_digit) - 1u); - *tmpc++ &= MP_MASK; + for (i = 0; i < a->used; i++) { + c->dp[i] = a->dp[i] - mu; + mu = c->dp[i] >> (MP_SIZEOF_BITS(mp_digit) - 1u); + c->dp[i] &= MP_MASK; } } /* zero excess digits */ - MP_ZERO_DIGITS(tmpc, oldused - ix); + s_mp_zero_digs(c->dp + c->used, oldused - c->used); mp_clamp(c); return MP_OKAY; diff --git a/mp_xor.c b/mp_xor.c index ca2c2f1..2fe8618 100644 --- a/mp_xor.c +++ b/mp_xor.c @@ -11,10 +11,8 @@ mp_err mp_xor(const mp_int *a, const mp_int *b, mp_int *c) mp_digit ac = 1, bc = 1, cc = 1; mp_sign csign = (a->sign != b->sign) ? MP_NEG : MP_ZPOS; - if (c->alloc < used) { - if ((err = mp_grow(c, used)) != MP_OKAY) { - return err; - } + if ((err = mp_grow(c, used)) != MP_OKAY) { + return err; } for (i = 0; i < used; i++) { diff --git a/mp_zero.c b/mp_zero.c index b7dddd2..48b60e4 100644 --- a/mp_zero.c +++ b/mp_zero.c @@ -7,7 +7,7 @@ void mp_zero(mp_int *a) { a->sign = MP_ZPOS; - MP_ZERO_DIGITS(a->dp, a->used); + s_mp_zero_digs(a->dp, a->used); a->used = 0; } #endif diff --git a/s_mp_add.c b/s_mp_add.c index 9220719..2bda2fe 100644 --- a/s_mp_add.c +++ b/s_mp_add.c @@ -6,85 +6,64 @@ /* low level addition, based on HAC pp.594, Algorithm 14.7 */ mp_err s_mp_add(const mp_int *a, const mp_int *b, mp_int *c) { - const mp_int *x; + int oldused, min, max, i; + mp_digit u; mp_err err; - int olduse, min, max; /* find sizes, we let |a| <= |b| which means we have to sort * them. "x" will point to the input with the most digits */ - if (a->used > b->used) { - min = b->used; - max = a->used; - x = a; - } else { - min = a->used; - max = b->used; - x = b; + if (a->used < b->used) { + MP_EXCH(const mp_int *, a, b); } + min = b->used; + max = a->used; + /* init result */ - if (c->alloc < (max + 1)) { - if ((err = mp_grow(c, max + 1)) != MP_OKAY) { - return err; - } + if ((err = mp_grow(c, max + 1)) != MP_OKAY) { + return err; } /* get old used digit count and set new one */ - olduse = c->used; + oldused = c->used; c->used = max + 1; - { - mp_digit u, *tmpa, *tmpb, *tmpc; - int i; + /* zero the carry */ + u = 0; + for (i = 0; i < min; i++) { + /* Compute the sum at one digit, T[i] = A[i] + B[i] + U */ + c->dp[i] = a->dp[i] + b->dp[i] + u; - /* alias for digit pointers */ + /* U = carry bit of T[i] */ + u = c->dp[i] >> (mp_digit)MP_DIGIT_BIT; - /* first input */ - tmpa = a->dp; + /* take away carry bit from T[i] */ + c->dp[i] &= MP_MASK; + } - /* second input */ - tmpb = b->dp; - - /* destination */ - tmpc = c->dp; - - /* zero the carry */ - u = 0; - for (i = 0; i < min; i++) { - /* Compute the sum at one digit, T[i] = A[i] + B[i] + U */ - *tmpc = *tmpa++ + *tmpb++ + u; + /* now copy higher words if any, that is in A+B + * if A or B has more digits add those in + */ + if (min != max) { + for (; i < max; i++) { + /* T[i] = A[i] + U */ + c->dp[i] = a->dp[i] + u; /* U = carry bit of T[i] */ - u = *tmpc >> (mp_digit)MP_DIGIT_BIT; + u = c->dp[i] >> (mp_digit)MP_DIGIT_BIT; /* take away carry bit from T[i] */ - *tmpc++ &= MP_MASK; + c->dp[i] &= MP_MASK; } - - /* now copy higher words if any, that is in A+B - * if A or B has more digits add those in - */ - if (min != max) { - for (; i < max; i++) { - /* T[i] = X[i] + U */ - *tmpc = x->dp[i] + u; - - /* U = carry bit of T[i] */ - u = *tmpc >> (mp_digit)MP_DIGIT_BIT; - - /* take away carry bit from T[i] */ - *tmpc++ &= MP_MASK; - } - } - - /* add carry */ - *tmpc++ = u; - - /* clear digits above oldused */ - MP_ZERO_DIGITS(tmpc, olduse - c->used); } + /* add carry */ + c->dp[i] = u; + + /* clear digits above oldused */ + s_mp_zero_digs(c->dp + c->used, oldused - c->used); + mp_clamp(c); return MP_OKAY; } diff --git a/s_mp_copy_digs.c b/s_mp_copy_digs.c new file mode 100644 index 0000000..4079c33 --- /dev/null +++ b/s_mp_copy_digs.c @@ -0,0 +1,23 @@ +#include "tommath_private.h" +#ifdef S_MP_COPY_DIGS_C +/* LibTomMath, multiple-precision integer library -- Tom St Denis */ +/* SPDX-License-Identifier: Unlicense */ + +#ifdef MP_USE_MEMOPS +# include +#endif + +void s_mp_copy_digs(mp_digit *d, const mp_digit *s, int digits) +{ +#ifdef MP_USE_MEMOPS + if (digits > 0) { + memcpy(d, s, (size_t)digits * sizeof(mp_digit)); + } +#else + while (digits-- > 0) { + *d++ = *s++; + } +#endif +} + +#endif diff --git a/s_mp_div_recursive.c b/s_mp_div_recursive.c index 7007aef..2fbc312 100644 --- a/s_mp_div_recursive.c +++ b/s_mp_div_recursive.c @@ -20,7 +20,7 @@ static mp_err s_mp_recursion(const mp_int *a, const mp_int *b, mp_int *q, mp_int mp_int A1, A2, B1, B0, Q1, Q0, R1, R0, t; int m = a->used - b->used, k = m/2; - if (m < MP_KARATSUBA_MUL_CUTOFF) { + if (m < MP_MUL_KARATSUBA_CUTOFF) { return s_mp_div_school(a, b, q, r); } @@ -104,7 +104,7 @@ mp_err s_mp_div_recursive(const mp_int *a, const mp_int *b, mp_int *q, mp_int *r Vid. section 2.3. */ - m = MP_KARATSUBA_MUL_CUTOFF; + m = MP_MUL_KARATSUBA_CUTOFF; while (m <= b->used) { m <<= 1; } diff --git a/s_mp_exptmod_fast.c b/s_mp_exptmod_fast.c index d581129..813eef2 100644 --- a/s_mp_exptmod_fast.c +++ b/s_mp_exptmod_fast.c @@ -80,10 +80,10 @@ mp_err s_mp_exptmod_fast(const mp_int *G, const mp_int *X, const mp_int *P, mp_i } /* automatically pick the comba one if available (saves quite a few calls/ifs) */ - if (MP_HAS(S_MP_MONTGOMERY_REDUCE_FAST) && + if (MP_HAS(S_MP_MONTGOMERY_REDUCE_COMBA) && (((P->used * 2) + 1) < MP_WARRAY) && (P->used < MP_MAXFAST)) { - redux = s_mp_montgomery_reduce_fast; + redux = s_mp_montgomery_reduce_comba; } else if (MP_HAS(MP_MONTGOMERY_REDUCE)) { /* use slower baseline Montgomery method */ redux = mp_montgomery_reduce; diff --git a/s_mp_get_bit.c b/s_mp_get_bit.c index f077f61..a509bce 100644 --- a/s_mp_get_bit.c +++ b/s_mp_get_bit.c @@ -5,12 +5,12 @@ /* SPDX-License-Identifier: Unlicense */ /* Get bit at position b and return true if the bit is 1, false if it is 0 */ -bool s_mp_get_bit(const mp_int *a, unsigned int b) +bool s_mp_get_bit(const mp_int *a, int b) { mp_digit bit; - int limb = (int)(b / MP_DIGIT_BIT); + int limb = b / MP_DIGIT_BIT; - if (limb >= a->used) { + if (limb < 0 || limb >= a->used) { return false; } diff --git a/s_mp_invmod.c b/s_mp_invmod.c new file mode 100644 index 0000000..f3b3f43 --- /dev/null +++ b/s_mp_invmod.c @@ -0,0 +1,117 @@ +#include "tommath_private.h" +#ifdef S_MP_INVMOD_C +/* LibTomMath, multiple-precision integer library -- Tom St Denis */ +/* SPDX-License-Identifier: Unlicense */ + +/* hac 14.61, pp608 */ +mp_err s_mp_invmod(const mp_int *a, const mp_int *b, mp_int *c) +{ + mp_int x, y, u, v, A, B, C, D; + mp_err err; + + /* b cannot be negative */ + if ((b->sign == MP_NEG) || mp_iszero(b)) { + return MP_VAL; + } + + /* init temps */ + if ((err = mp_init_multi(&x, &y, &u, &v, + &A, &B, &C, &D, NULL)) != MP_OKAY) { + return err; + } + + /* x = a, y = b */ + if ((err = mp_mod(a, b, &x)) != MP_OKAY) goto LBL_ERR; + if ((err = mp_copy(b, &y)) != MP_OKAY) goto LBL_ERR; + + /* 2. [modified] if x,y are both even then return an error! */ + if (mp_iseven(&x) && mp_iseven(&y)) { + err = MP_VAL; + goto LBL_ERR; + } + + /* 3. u=x, v=y, A=1, B=0, C=0,D=1 */ + if ((err = mp_copy(&x, &u)) != MP_OKAY) goto LBL_ERR; + if ((err = mp_copy(&y, &v)) != MP_OKAY) goto LBL_ERR; + mp_set(&A, 1uL); + mp_set(&D, 1uL); + + do { + /* 4. while u is even do */ + while (mp_iseven(&u)) { + /* 4.1 u = u/2 */ + if ((err = mp_div_2(&u, &u)) != MP_OKAY) goto LBL_ERR; + + /* 4.2 if A or B is odd then */ + if (mp_isodd(&A) || mp_isodd(&B)) { + /* A = (A+y)/2, B = (B-x)/2 */ + if ((err = mp_add(&A, &y, &A)) != MP_OKAY) goto LBL_ERR; + if ((err = mp_sub(&B, &x, &B)) != MP_OKAY) goto LBL_ERR; + } + /* A = A/2, B = B/2 */ + if ((err = mp_div_2(&A, &A)) != MP_OKAY) goto LBL_ERR; + if ((err = mp_div_2(&B, &B)) != MP_OKAY) goto LBL_ERR; + } + + /* 5. while v is even do */ + while (mp_iseven(&v)) { + /* 5.1 v = v/2 */ + if ((err = mp_div_2(&v, &v)) != MP_OKAY) goto LBL_ERR; + + /* 5.2 if C or D is odd then */ + if (mp_isodd(&C) || mp_isodd(&D)) { + /* C = (C+y)/2, D = (D-x)/2 */ + if ((err = mp_add(&C, &y, &C)) != MP_OKAY) goto LBL_ERR; + if ((err = mp_sub(&D, &x, &D)) != MP_OKAY) goto LBL_ERR; + } + /* C = C/2, D = D/2 */ + if ((err = mp_div_2(&C, &C)) != MP_OKAY) goto LBL_ERR; + if ((err = mp_div_2(&D, &D)) != MP_OKAY) goto LBL_ERR; + } + + /* 6. if u >= v then */ + if (mp_cmp(&u, &v) != MP_LT) { + /* u = u - v, A = A - C, B = B - D */ + if ((err = mp_sub(&u, &v, &u)) != MP_OKAY) goto LBL_ERR; + + if ((err = mp_sub(&A, &C, &A)) != MP_OKAY) goto LBL_ERR; + + if ((err = mp_sub(&B, &D, &B)) != MP_OKAY) goto LBL_ERR; + } else { + /* v - v - u, C = C - A, D = D - B */ + if ((err = mp_sub(&v, &u, &v)) != MP_OKAY) goto LBL_ERR; + + if ((err = mp_sub(&C, &A, &C)) != MP_OKAY) goto LBL_ERR; + + if ((err = mp_sub(&D, &B, &D)) != MP_OKAY) goto LBL_ERR; + } + + /* if not zero goto step 4 */ + } while (!mp_iszero(&u)); + + /* now a = C, b = D, gcd == g*v */ + + /* if v != 1 then there is no inverse */ + if (mp_cmp_d(&v, 1uL) != MP_EQ) { + err = MP_VAL; + goto LBL_ERR; + } + + /* if its too low */ + while (mp_cmp_d(&C, 0uL) == MP_LT) { + if ((err = mp_add(&C, b, &C)) != MP_OKAY) goto LBL_ERR; + } + + /* too big */ + while (mp_cmp_mag(&C, b) != MP_LT) { + if ((err = mp_sub(&C, b, &C)) != MP_OKAY) goto LBL_ERR; + } + + /* C is now the inverse */ + mp_exch(&C, c); + +LBL_ERR: + mp_clear_multi(&x, &y, &u, &v, &A, &B, &C, &D, NULL); + return err; +} +#endif diff --git a/s_mp_invmod_fast.c b/s_mp_invmod_odd.c similarity index 54% rename from s_mp_invmod_fast.c rename to s_mp_invmod_odd.c index ed1fc4a..8b55d5b 100644 --- a/s_mp_invmod_fast.c +++ b/s_mp_invmod_odd.c @@ -1,5 +1,5 @@ #include "tommath_private.h" -#ifdef S_MP_INVMOD_FAST_C +#ifdef S_MP_INVMOD_ODD_C /* LibTomMath, multiple-precision integer library -- Tom St Denis */ /* SPDX-License-Identifier: Unlicense */ @@ -9,7 +9,7 @@ * Based on slow invmod except this is optimized for the case where b is * odd as per HAC Note 14.64 on pp. 610 */ -mp_err s_mp_invmod_fast(const mp_int *a, const mp_int *b, mp_int *c) +mp_err s_mp_invmod_odd(const mp_int *a, const mp_int *b, mp_int *c) { mp_int x, y, u, v, B, D; mp_sign neg; @@ -42,51 +42,49 @@ mp_err s_mp_invmod_fast(const mp_int *a, const mp_int *b, mp_int *c) if ((err = mp_copy(&y, &v)) != MP_OKAY) goto LBL_ERR; mp_set(&D, 1uL); -top: - /* 4. while u is even do */ - while (mp_iseven(&u)) { - /* 4.1 u = u/2 */ - if ((err = mp_div_2(&u, &u)) != MP_OKAY) goto LBL_ERR; + do { + /* 4. while u is even do */ + while (mp_iseven(&u)) { + /* 4.1 u = u/2 */ + if ((err = mp_div_2(&u, &u)) != MP_OKAY) goto LBL_ERR; - /* 4.2 if B is odd then */ - if (mp_isodd(&B)) { - if ((err = mp_sub(&B, &x, &B)) != MP_OKAY) goto LBL_ERR; + /* 4.2 if B is odd then */ + if (mp_isodd(&B)) { + if ((err = mp_sub(&B, &x, &B)) != MP_OKAY) goto LBL_ERR; + } + /* B = B/2 */ + if ((err = mp_div_2(&B, &B)) != MP_OKAY) goto LBL_ERR; } - /* B = B/2 */ - if ((err = mp_div_2(&B, &B)) != MP_OKAY) goto LBL_ERR; - } - /* 5. while v is even do */ - while (mp_iseven(&v)) { - /* 5.1 v = v/2 */ - if ((err = mp_div_2(&v, &v)) != MP_OKAY) goto LBL_ERR; + /* 5. while v is even do */ + while (mp_iseven(&v)) { + /* 5.1 v = v/2 */ + if ((err = mp_div_2(&v, &v)) != MP_OKAY) goto LBL_ERR; - /* 5.2 if D is odd then */ - if (mp_isodd(&D)) { - /* D = (D-x)/2 */ - if ((err = mp_sub(&D, &x, &D)) != MP_OKAY) goto LBL_ERR; + /* 5.2 if D is odd then */ + if (mp_isodd(&D)) { + /* D = (D-x)/2 */ + if ((err = mp_sub(&D, &x, &D)) != MP_OKAY) goto LBL_ERR; + } + /* D = D/2 */ + if ((err = mp_div_2(&D, &D)) != MP_OKAY) goto LBL_ERR; } - /* D = D/2 */ - if ((err = mp_div_2(&D, &D)) != MP_OKAY) goto LBL_ERR; - } - /* 6. if u >= v then */ - if (mp_cmp(&u, &v) != MP_LT) { - /* u = u - v, B = B - D */ - if ((err = mp_sub(&u, &v, &u)) != MP_OKAY) goto LBL_ERR; + /* 6. if u >= v then */ + if (mp_cmp(&u, &v) != MP_LT) { + /* u = u - v, B = B - D */ + if ((err = mp_sub(&u, &v, &u)) != MP_OKAY) goto LBL_ERR; - if ((err = mp_sub(&B, &D, &B)) != MP_OKAY) goto LBL_ERR; - } else { - /* v - v - u, D = D - B */ - if ((err = mp_sub(&v, &u, &v)) != MP_OKAY) goto LBL_ERR; + if ((err = mp_sub(&B, &D, &B)) != MP_OKAY) goto LBL_ERR; + } else { + /* v - v - u, D = D - B */ + if ((err = mp_sub(&v, &u, &v)) != MP_OKAY) goto LBL_ERR; - if ((err = mp_sub(&D, &B, &D)) != MP_OKAY) goto LBL_ERR; - } + if ((err = mp_sub(&D, &B, &D)) != MP_OKAY) goto LBL_ERR; + } - /* if not zero goto step 4 */ - if (!mp_iszero(&u)) { - goto top; - } + /* if not zero goto step 4 */ + } while (!mp_iszero(&u)); /* now a = C, b = D, gcd == g*v */ diff --git a/s_mp_invmod_slow.c b/s_mp_invmod_slow.c deleted file mode 100644 index 28cd6cd..0000000 --- a/s_mp_invmod_slow.c +++ /dev/null @@ -1,119 +0,0 @@ -#include "tommath_private.h" -#ifdef S_MP_INVMOD_SLOW_C -/* LibTomMath, multiple-precision integer library -- Tom St Denis */ -/* SPDX-License-Identifier: Unlicense */ - -/* hac 14.61, pp608 */ -mp_err s_mp_invmod_slow(const mp_int *a, const mp_int *b, mp_int *c) -{ - mp_int x, y, u, v, A, B, C, D; - mp_err err; - - /* b cannot be negative */ - if ((b->sign == MP_NEG) || mp_iszero(b)) { - return MP_VAL; - } - - /* init temps */ - if ((err = mp_init_multi(&x, &y, &u, &v, - &A, &B, &C, &D, NULL)) != MP_OKAY) { - return err; - } - - /* x = a, y = b */ - if ((err = mp_mod(a, b, &x)) != MP_OKAY) goto LBL_ERR; - if ((err = mp_copy(b, &y)) != MP_OKAY) goto LBL_ERR; - - /* 2. [modified] if x,y are both even then return an error! */ - if (mp_iseven(&x) && mp_iseven(&y)) { - err = MP_VAL; - goto LBL_ERR; - } - - /* 3. u=x, v=y, A=1, B=0, C=0,D=1 */ - if ((err = mp_copy(&x, &u)) != MP_OKAY) goto LBL_ERR; - if ((err = mp_copy(&y, &v)) != MP_OKAY) goto LBL_ERR; - mp_set(&A, 1uL); - mp_set(&D, 1uL); - -top: - /* 4. while u is even do */ - while (mp_iseven(&u)) { - /* 4.1 u = u/2 */ - if ((err = mp_div_2(&u, &u)) != MP_OKAY) goto LBL_ERR; - - /* 4.2 if A or B is odd then */ - if (mp_isodd(&A) || mp_isodd(&B)) { - /* A = (A+y)/2, B = (B-x)/2 */ - if ((err = mp_add(&A, &y, &A)) != MP_OKAY) goto LBL_ERR; - if ((err = mp_sub(&B, &x, &B)) != MP_OKAY) goto LBL_ERR; - } - /* A = A/2, B = B/2 */ - if ((err = mp_div_2(&A, &A)) != MP_OKAY) goto LBL_ERR; - if ((err = mp_div_2(&B, &B)) != MP_OKAY) goto LBL_ERR; - } - - /* 5. while v is even do */ - while (mp_iseven(&v)) { - /* 5.1 v = v/2 */ - if ((err = mp_div_2(&v, &v)) != MP_OKAY) goto LBL_ERR; - - /* 5.2 if C or D is odd then */ - if (mp_isodd(&C) || mp_isodd(&D)) { - /* C = (C+y)/2, D = (D-x)/2 */ - if ((err = mp_add(&C, &y, &C)) != MP_OKAY) goto LBL_ERR; - if ((err = mp_sub(&D, &x, &D)) != MP_OKAY) goto LBL_ERR; - } - /* C = C/2, D = D/2 */ - if ((err = mp_div_2(&C, &C)) != MP_OKAY) goto LBL_ERR; - if ((err = mp_div_2(&D, &D)) != MP_OKAY) goto LBL_ERR; - } - - /* 6. if u >= v then */ - if (mp_cmp(&u, &v) != MP_LT) { - /* u = u - v, A = A - C, B = B - D */ - if ((err = mp_sub(&u, &v, &u)) != MP_OKAY) goto LBL_ERR; - - if ((err = mp_sub(&A, &C, &A)) != MP_OKAY) goto LBL_ERR; - - if ((err = mp_sub(&B, &D, &B)) != MP_OKAY) goto LBL_ERR; - } else { - /* v - v - u, C = C - A, D = D - B */ - if ((err = mp_sub(&v, &u, &v)) != MP_OKAY) goto LBL_ERR; - - if ((err = mp_sub(&C, &A, &C)) != MP_OKAY) goto LBL_ERR; - - if ((err = mp_sub(&D, &B, &D)) != MP_OKAY) goto LBL_ERR; - } - - /* if not zero goto step 4 */ - if (!mp_iszero(&u)) { - goto top; - } - - /* now a = C, b = D, gcd == g*v */ - - /* if v != 1 then there is no inverse */ - if (mp_cmp_d(&v, 1uL) != MP_EQ) { - err = MP_VAL; - goto LBL_ERR; - } - - /* if its too low */ - while (mp_cmp_d(&C, 0uL) == MP_LT) { - if ((err = mp_add(&C, b, &C)) != MP_OKAY) goto LBL_ERR; - } - - /* too big */ - while (mp_cmp_mag(&C, b) != MP_LT) { - if ((err = mp_sub(&C, b, &C)) != MP_OKAY) goto LBL_ERR; - } - - /* C is now the inverse */ - mp_exch(&C, c); - err = MP_OKAY; -LBL_ERR: - mp_clear_multi(&x, &y, &u, &v, &A, &B, &C, &D, NULL); - return err; -} -#endif diff --git a/s_mp_montgomery_reduce_fast.c b/s_mp_montgomery_reduce_comba.c similarity index 51% rename from s_mp_montgomery_reduce_fast.c rename to s_mp_montgomery_reduce_comba.c index 083e7a4..6f249c4 100644 --- a/s_mp_montgomery_reduce_fast.c +++ b/s_mp_montgomery_reduce_comba.c @@ -1,5 +1,5 @@ #include "tommath_private.h" -#ifdef S_MP_MONTGOMERY_REDUCE_FAST_C +#ifdef S_MP_MONTGOMERY_REDUCE_COMBA_C /* LibTomMath, multiple-precision integer library -- Tom St Denis */ /* SPDX-License-Identifier: Unlicense */ @@ -11,9 +11,9 @@ * * Based on Algorithm 14.32 on pp.601 of HAC. */ -mp_err s_mp_montgomery_reduce_fast(mp_int *x, const mp_int *n, mp_digit rho) +mp_err s_mp_montgomery_reduce_comba(mp_int *x, const mp_int *n, mp_digit rho) { - int ix, olduse; + int ix, oldused; mp_err err; mp_word W[MP_WARRAY]; @@ -22,50 +22,40 @@ mp_err s_mp_montgomery_reduce_fast(mp_int *x, const mp_int *n, mp_digit rho) } /* get old used count */ - olduse = x->used; + oldused = x->used; /* grow a as required */ - if (x->alloc < (n->used + 1)) { - if ((err = mp_grow(x, n->used + 1)) != MP_OKAY) { - return err; - } + if ((err = mp_grow(x, n->used + 1)) != MP_OKAY) { + return err; } /* first we have to get the digits of the input into * an array of double precision words W[...] */ - { - mp_word *_W; - mp_digit *tmpx; - /* alias for the W[] array */ - _W = W; + /* copy the digits of a into W[0..a->used-1] */ + for (ix = 0; ix < x->used; ix++) { + W[ix] = x->dp[ix]; + } - /* alias for the digits of x*/ - tmpx = x->dp; - - /* copy the digits of a into W[0..a->used-1] */ - for (ix = 0; ix < x->used; ix++) { - *_W++ = *tmpx++; - } - - /* zero the high words of W[a->used..m->used*2] */ - if (ix < ((n->used * 2) + 1)) { - MP_ZERO_BUFFER(_W, sizeof(mp_word) * (size_t)(((n->used * 2) + 1) - ix)); - } + /* zero the high words of W[a->used..m->used*2] */ + if (ix < ((n->used * 2) + 1)) { + s_mp_zero_buf(W + x->used, sizeof(mp_word) * (size_t)(((n->used * 2) + 1) - ix)); } /* now we proceed to zero successive digits * from the least significant upwards */ for (ix = 0; ix < n->used; ix++) { + int iy; + mp_digit mu; + /* mu = ai * m' mod b * * We avoid a double precision multiplication (which isn't required) * by casting the value down to a mp_digit. Note this requires * that W[ix-1] have the carry cleared (see after the inner loop) */ - mp_digit mu; mu = ((W[ix] & MP_MASK) * rho) & MP_MASK; /* a = a + mu * m * b**i @@ -82,21 +72,8 @@ mp_err s_mp_montgomery_reduce_fast(mp_int *x, const mp_int *n, mp_digit rho) * carry fixups are done in order so after these loops the * first m->used words of W[] have the carries fixed */ - { - int iy; - mp_digit *tmpn; - mp_word *_W; - - /* alias for the digits of the modulus */ - tmpn = n->dp; - - /* Alias for the columns set by an offset of ix */ - _W = W + ix; - - /* inner loop */ - for (iy = 0; iy < n->used; iy++) { - *_W++ += (mp_word)mu * (mp_word)*tmpn++; - } + for (iy = 0; iy < n->used; iy++) { + W[ix + iy] += (mp_word)mu * (mp_word)n->dp[iy]; } /* now fix carry for next digit, W[ix+1] */ @@ -107,47 +84,30 @@ mp_err s_mp_montgomery_reduce_fast(mp_int *x, const mp_int *n, mp_digit rho) * shift the words downward [all those least * significant digits we zeroed]. */ - { - mp_digit *tmpx; - mp_word *_W, *_W1; - /* nox fix rest of carries */ - - /* alias for current word */ - _W1 = W + ix; - - /* alias for next word, where the carry goes */ - _W = W + ++ix; - - for (; ix < ((n->used * 2) + 1); ix++) { - *_W++ += *_W1++ >> (mp_word)MP_DIGIT_BIT; - } - - /* copy out, A = A/b**n - * - * The result is A/b**n but instead of converting from an - * array of mp_word to mp_digit than calling mp_rshd - * we just copy them in the right order - */ - - /* alias for destination word */ - tmpx = x->dp; - - /* alias for shifted double precision result */ - _W = W + n->used; - - for (ix = 0; ix < (n->used + 1); ix++) { - *tmpx++ = *_W++ & (mp_word)MP_MASK; - } - - /* zero oldused digits, if the input a was larger than - * m->used+1 we'll have to clear the digits - */ - MP_ZERO_DIGITS(tmpx, olduse - ix); + for (; ix < (n->used * 2); ix++) { + W[ix + 1] += W[ix] >> (mp_word)MP_DIGIT_BIT; } - /* set the max used and clamp */ + /* copy out, A = A/b**n + * + * The result is A/b**n but instead of converting from an + * array of mp_word to mp_digit than calling mp_rshd + * we just copy them in the right order + */ + + for (ix = 0; ix < (n->used + 1); ix++) { + x->dp[ix] = W[n->used + ix] & (mp_word)MP_MASK; + } + + /* set the max used */ x->used = n->used + 1; + + /* zero oldused digits, if the input a was larger than + * m->used+1 we'll have to clear the digits + */ + s_mp_zero_digs(x->dp + x->used, oldused - x->used); + mp_clamp(x); /* if A >= m then A = A - m */ diff --git a/s_mp_mul_digs.c b/s_mp_mul.c similarity index 63% rename from s_mp_mul_digs.c rename to s_mp_mul.c index ea0985b..cd17b99 100644 --- a/s_mp_mul_digs.c +++ b/s_mp_mul.c @@ -1,5 +1,5 @@ #include "tommath_private.h" -#ifdef S_MP_MUL_DIGS_C +#ifdef S_MP_MUL_C /* LibTomMath, multiple-precision integer library -- Tom St Denis */ /* SPDX-License-Identifier: Unlicense */ @@ -7,19 +7,16 @@ * HAC pp. 595, Algorithm 14.12 Modified so you can control how * many digits of output are created. */ -mp_err s_mp_mul_digs(const mp_int *a, const mp_int *b, mp_int *c, int digs) +mp_err s_mp_mul(const mp_int *a, const mp_int *b, mp_int *c, int digs) { mp_int t; mp_err err; - int pa, pb, ix, iy; - mp_digit u; - mp_word r; - mp_digit tmpx, *tmpt, *tmpy; + int pa, ix; /* can we use the fast multiplier? */ if ((digs < MP_WARRAY) && (MP_MIN(a->used, b->used) < MP_MAXFAST)) { - return s_mp_mul_digs_fast(a, b, c, digs); + return s_mp_mul_comba(a, b, c, digs); } if ((err = mp_init_size(&t, digs)) != MP_OKAY) { @@ -30,38 +27,28 @@ mp_err s_mp_mul_digs(const mp_int *a, const mp_int *b, mp_int *c, int digs) /* compute the digits of the product directly */ pa = a->used; for (ix = 0; ix < pa; ix++) { - /* set the carry to zero */ - u = 0; + int iy, pb; + mp_digit u = 0; /* limit ourselves to making digs digits of output */ pb = MP_MIN(b->used, digs - ix); - /* setup some aliases */ - /* copy of the digit from a used within the nested loop */ - tmpx = a->dp[ix]; - - /* an alias for the destination shifted ix places */ - tmpt = t.dp + ix; - - /* an alias for the digits of b */ - tmpy = b->dp; - /* compute the columns of the output and propagate the carry */ for (iy = 0; iy < pb; iy++) { /* compute the column as a mp_word */ - r = (mp_word)*tmpt + - ((mp_word)tmpx * (mp_word)*tmpy++) + - (mp_word)u; + mp_word r = (mp_word)t.dp[ix + iy] + + ((mp_word)a->dp[ix] * (mp_word)b->dp[iy]) + + (mp_word)u; /* the new column is the lower part of the result */ - *tmpt++ = (mp_digit)(r & (mp_word)MP_MASK); + t.dp[ix + iy] = (mp_digit)(r & (mp_word)MP_MASK); /* get the carry word from the result */ u = (mp_digit)(r >> (mp_word)MP_DIGIT_BIT); } /* set carry if it is placed below digs */ if ((ix + iy) < digs) { - *tmpt = u; + t.dp[ix + pb] = u; } } diff --git a/s_mp_balance_mul.c b/s_mp_mul_balance.c similarity index 62% rename from s_mp_balance_mul.c rename to s_mp_mul_balance.c index 4108830..f36f0d3 100644 --- a/s_mp_balance_mul.c +++ b/s_mp_mul_balance.c @@ -1,20 +1,16 @@ #include "tommath_private.h" -#ifdef S_MP_BALANCE_MUL_C +#ifdef S_MP_MUL_BALANCE_C /* LibTomMath, multiple-precision integer library -- Tom St Denis */ /* SPDX-License-Identifier: Unlicense */ /* single-digit multiplication with the smaller number as the single-digit */ -mp_err s_mp_balance_mul(const mp_int *a, const mp_int *b, mp_int *c) +mp_err s_mp_mul_balance(const mp_int *a, const mp_int *b, mp_int *c) { - int count, len_a, len_b, nblocks, i, j, bsize; - mp_int a0, tmp, A, B, r; + mp_int a0, tmp, r; mp_err err; - - len_a = a->used; - len_b = b->used; - - nblocks = MP_MAX(a->used, b->used) / MP_MIN(a->used, b->used); - bsize = MP_MIN(a->used, b->used) ; + int i, j, + nblocks = MP_MAX(a->used, b->used) / MP_MIN(a->used, b->used), + bsize = MP_MIN(a->used, b->used); if ((err = mp_init_size(&a0, bsize + 2)) != MP_OKAY) { return err; @@ -25,24 +21,19 @@ mp_err s_mp_balance_mul(const mp_int *a, const mp_int *b, mp_int *c) } /* Make sure that A is the larger one*/ - if (len_a < len_b) { - B = *a; - A = *b; - } else { - A = *a; - B = *b; + if (a->used < b->used) { + MP_EXCH(const mp_int *, a, b); } for (i = 0, j=0; i < nblocks; i++) { /* Cut a slice off of a */ - a0.used = 0; - for (count = 0; count < bsize; count++) { - a0.dp[count] = A.dp[ j++ ]; - a0.used++; - } + a0.used = bsize; + s_mp_copy_digs(a0.dp, a->dp + j, a0.used); + j += a0.used; mp_clamp(&a0); + /* Multiply with b */ - if ((err = mp_mul(&a0, &B, &tmp)) != MP_OKAY) { + if ((err = mp_mul(&a0, b, &tmp)) != MP_OKAY) { goto LBL_ERR; } /* Shift tmp to the correct position */ @@ -55,14 +46,13 @@ mp_err s_mp_balance_mul(const mp_int *a, const mp_int *b, mp_int *c) } } /* The left-overs; there are always left-overs */ - if (j < A.used) { - a0.used = 0; - for (count = 0; j < A.used; count++) { - a0.dp[count] = A.dp[ j++ ]; - a0.used++; - } + if (j < a->used) { + a0.used = a->used - j; + s_mp_copy_digs(a0.dp, a->dp + j, a0.used); + j += a0.used; mp_clamp(&a0); - if ((err = mp_mul(&a0, &B, &tmp)) != MP_OKAY) { + + if ((err = mp_mul(&a0, b, &tmp)) != MP_OKAY) { goto LBL_ERR; } if ((err = mp_lshd(&tmp, bsize * i)) != MP_OKAY) { diff --git a/s_mp_mul_digs_fast.c b/s_mp_mul_comba.c similarity index 67% rename from s_mp_mul_digs_fast.c rename to s_mp_mul_comba.c index 8988838..07dd791 100644 --- a/s_mp_mul_digs_fast.c +++ b/s_mp_mul_comba.c @@ -1,5 +1,5 @@ #include "tommath_private.h" -#ifdef S_MP_MUL_DIGS_FAST_C +#ifdef S_MP_MUL_COMBA_C /* LibTomMath, multiple-precision integer library -- Tom St Denis */ /* SPDX-License-Identifier: Unlicense */ @@ -19,18 +19,16 @@ * Based on Algorithm 14.12 on pp.595 of HAC. * */ -mp_err s_mp_mul_digs_fast(const mp_int *a, const mp_int *b, mp_int *c, int digs) +mp_err s_mp_mul_comba(const mp_int *a, const mp_int *b, mp_int *c, int digs) { - int olduse, pa, ix, iz; + int oldused, pa, ix; mp_err err; mp_digit W[MP_WARRAY]; mp_word _W; /* grow the destination as required */ - if (c->alloc < digs) { - if ((err = mp_grow(c, digs)) != MP_OKAY) { - return err; - } + if ((err = mp_grow(c, digs)) != MP_OKAY) { + return err; } /* number of output digits to produce */ @@ -39,18 +37,12 @@ mp_err s_mp_mul_digs_fast(const mp_int *a, const mp_int *b, mp_int *c, int digs) /* clear the carry */ _W = 0; for (ix = 0; ix < pa; ix++) { - int tx, ty; - int iy; - mp_digit *tmpx, *tmpy; + int tx, ty, iy, iz; /* get offsets into the two bignums */ ty = MP_MIN(b->used-1, ix); tx = ix - ty; - /* setup temp aliases */ - tmpx = a->dp + tx; - tmpy = b->dp + ty; - /* this is the number of times the loop will iterrate, essentially while (tx++ < a->used && ty-- >= 0) { ... } */ @@ -58,8 +50,7 @@ mp_err s_mp_mul_digs_fast(const mp_int *a, const mp_int *b, mp_int *c, int digs) /* execute loop */ for (iz = 0; iz < iy; ++iz) { - _W += (mp_word)*tmpx++ * (mp_word)*tmpy--; - + _W += (mp_word)a->dp[tx + iz] * (mp_word)b->dp[ty - iz]; } /* store term */ @@ -70,20 +61,17 @@ mp_err s_mp_mul_digs_fast(const mp_int *a, const mp_int *b, mp_int *c, int digs) } /* setup dest */ - olduse = c->used; + oldused = c->used; c->used = pa; - { - mp_digit *tmpc; - tmpc = c->dp; - for (ix = 0; ix < pa; ix++) { - /* now extract the previous digit [below the carry] */ - *tmpc++ = W[ix]; - } - - /* clear unused digits [that existed in the old copy of c] */ - MP_ZERO_DIGITS(tmpc, olduse - ix); + for (ix = 0; ix < pa; ix++) { + /* now extract the previous digit [below the carry] */ + c->dp[ix] = W[ix]; } + + /* clear unused digits [that existed in the old copy of c] */ + s_mp_zero_digs(c->dp + c->used, oldused - c->used); + mp_clamp(c); return MP_OKAY; } diff --git a/s_mp_mul_high_digs.c b/s_mp_mul_high.c similarity index 54% rename from s_mp_mul_high_digs.c rename to s_mp_mul_high.c index 87cfbe5..d1d1806 100644 --- a/s_mp_mul_high_digs.c +++ b/s_mp_mul_high.c @@ -1,25 +1,22 @@ #include "tommath_private.h" -#ifdef S_MP_MUL_HIGH_DIGS_C +#ifdef S_MP_MUL_HIGH_C /* LibTomMath, multiple-precision integer library -- Tom St Denis */ /* SPDX-License-Identifier: Unlicense */ /* multiplies |a| * |b| and does not compute the lower digs digits * [meant to get the higher part of the product] */ -mp_err s_mp_mul_high_digs(const mp_int *a, const mp_int *b, mp_int *c, int digs) +mp_err s_mp_mul_high(const mp_int *a, const mp_int *b, mp_int *c, int digs) { mp_int t; - int pa, pb, ix, iy; + int pa, pb, ix; mp_err err; - mp_digit u; - mp_word r; - mp_digit tmpx, *tmpt, *tmpy; /* can we use the fast multiplier? */ - if (MP_HAS(S_MP_MUL_HIGH_DIGS_FAST) + if (MP_HAS(S_MP_MUL_HIGH_COMBA) && ((a->used + b->used + 1) < MP_WARRAY) && (MP_MIN(a->used, b->used) < MP_MAXFAST)) { - return s_mp_mul_high_digs_fast(a, b, c, digs); + return s_mp_mul_high_comba(a, b, c, digs); } if ((err = mp_init_size(&t, a->used + b->used + 1)) != MP_OKAY) { @@ -30,31 +27,22 @@ mp_err s_mp_mul_high_digs(const mp_int *a, const mp_int *b, mp_int *c, int digs) pa = a->used; pb = b->used; for (ix = 0; ix < pa; ix++) { - /* clear the carry */ - u = 0; - - /* left hand side of A[ix] * B[iy] */ - tmpx = a->dp[ix]; - - /* alias to the address of where the digits will be stored */ - tmpt = &(t.dp[digs]); - - /* alias for where to read the right hand side from */ - tmpy = b->dp + (digs - ix); + int iy; + mp_digit u = 0; for (iy = digs - ix; iy < pb; iy++) { /* calculate the double precision result */ - r = (mp_word)*tmpt + - ((mp_word)tmpx * (mp_word)*tmpy++) + - (mp_word)u; + mp_word r = (mp_word)t.dp[ix + iy] + + ((mp_word)a->dp[ix] * (mp_word)b->dp[iy]) + + (mp_word)u; /* get the lower part */ - *tmpt++ = (mp_digit)(r & (mp_word)MP_MASK); + t.dp[ix + iy] = (mp_digit)(r & (mp_word)MP_MASK); /* carry the carry */ u = (mp_digit)(r >> (mp_word)MP_DIGIT_BIT); } - *tmpt = u; + t.dp[ix + pb] = u; } mp_clamp(&t); mp_exch(&t, c); diff --git a/s_mp_mul_high_digs_fast.c b/s_mp_mul_high_comba.c similarity index 57% rename from s_mp_mul_high_digs_fast.c rename to s_mp_mul_high_comba.c index 1559ebc..317346d 100644 --- a/s_mp_mul_high_digs_fast.c +++ b/s_mp_mul_high_comba.c @@ -1,10 +1,10 @@ #include "tommath_private.h" -#ifdef S_MP_MUL_HIGH_DIGS_FAST_C +#ifdef S_MP_MUL_HIGH_COMBA_C /* LibTomMath, multiple-precision integer library -- Tom St Denis */ /* SPDX-License-Identifier: Unlicense */ -/* this is a modified version of s_mp_mul_digs_fast that only produces - * output digits *above* digs. See the comments for s_mp_mul_digs_fast +/* this is a modified version of s_mp_mul_comba that only produces + * output digits *above* digs. See the comments for s_mp_mul_comba * to see how it works. * * This is used in the Barrett reduction since for one of the multiplications @@ -12,36 +12,29 @@ * * Based on Algorithm 14.12 on pp.595 of HAC. */ -mp_err s_mp_mul_high_digs_fast(const mp_int *a, const mp_int *b, mp_int *c, int digs) +mp_err s_mp_mul_high_comba(const mp_int *a, const mp_int *b, mp_int *c, int digs) { - int olduse, pa, ix, iz; + int oldused, pa, ix; mp_err err; mp_digit W[MP_WARRAY]; mp_word _W; /* grow the destination as required */ pa = a->used + b->used; - if (c->alloc < pa) { - if ((err = mp_grow(c, pa)) != MP_OKAY) { - return err; - } + if ((err = mp_grow(c, pa)) != MP_OKAY) { + return err; } /* number of output digits to produce */ pa = a->used + b->used; _W = 0; for (ix = digs; ix < pa; ix++) { - int tx, ty, iy; - mp_digit *tmpx, *tmpy; + int tx, ty, iy, iz; /* get offsets into the two bignums */ ty = MP_MIN(b->used-1, ix); tx = ix - ty; - /* setup temp aliases */ - tmpx = a->dp + tx; - tmpy = b->dp + ty; - /* this is the number of times the loop will iterrate, essentially its while (tx++ < a->used && ty-- >= 0) { ... } */ @@ -49,7 +42,7 @@ mp_err s_mp_mul_high_digs_fast(const mp_int *a, const mp_int *b, mp_int *c, int /* execute loop */ for (iz = 0; iz < iy; iz++) { - _W += (mp_word)*tmpx++ * (mp_word)*tmpy--; + _W += (mp_word)a->dp[tx + iz] * (mp_word)b->dp[ty - iz]; } /* store term */ @@ -60,21 +53,17 @@ mp_err s_mp_mul_high_digs_fast(const mp_int *a, const mp_int *b, mp_int *c, int } /* setup dest */ - olduse = c->used; + oldused = c->used; c->used = pa; - { - mp_digit *tmpc; - - tmpc = c->dp + digs; - for (ix = digs; ix < pa; ix++) { - /* now extract the previous digit [below the carry] */ - *tmpc++ = W[ix]; - } - - /* clear unused digits [that existed in the old copy of c] */ - MP_ZERO_DIGITS(tmpc, olduse - ix); + for (ix = digs; ix < pa; ix++) { + /* now extract the previous digit [below the carry] */ + c->dp[ix] = W[ix]; } + + /* clear unused digits [that existed in the old copy of c] */ + s_mp_zero_digs(c->dp + c->used, oldused - c->used); + mp_clamp(c); return MP_OKAY; } diff --git a/s_mp_karatsuba_mul.c b/s_mp_mul_karatsuba.c similarity index 64% rename from s_mp_karatsuba_mul.c rename to s_mp_mul_karatsuba.c index df3daa7..bf9271f 100644 --- a/s_mp_karatsuba_mul.c +++ b/s_mp_mul_karatsuba.c @@ -1,5 +1,5 @@ #include "tommath_private.h" -#ifdef S_MP_KARATSUBA_MUL_C +#ifdef S_MP_MUL_KARATSUBA_C /* LibTomMath, multiple-precision integer library -- Tom St Denis */ /* SPDX-License-Identifier: Unlicense */ @@ -32,11 +32,11 @@ * Generally though the overhead of this method doesn't pay off * until a certain size (N ~ 80) is reached. */ -mp_err s_mp_karatsuba_mul(const mp_int *a, const mp_int *b, mp_int *c) +mp_err s_mp_mul_karatsuba(const mp_int *a, const mp_int *b, mp_int *c) { mp_int x0, x1, y0, y1, t1, x0y0, x1y1; - int B; - mp_err err = MP_MEM; /* default the return code to an error */ + int B; + mp_err err; /* min # of digits */ B = MP_MIN(a->used, b->used); @@ -45,27 +45,27 @@ mp_err s_mp_karatsuba_mul(const mp_int *a, const mp_int *b, mp_int *c) B = B >> 1; /* init copy all the temps */ - if (mp_init_size(&x0, B) != MP_OKAY) { + if ((err = mp_init_size(&x0, B)) != MP_OKAY) { goto LBL_ERR; } - if (mp_init_size(&x1, a->used - B) != MP_OKAY) { + if ((err = mp_init_size(&x1, a->used - B)) != MP_OKAY) { goto X0; } - if (mp_init_size(&y0, B) != MP_OKAY) { + if ((err = mp_init_size(&y0, B)) != MP_OKAY) { goto X1; } - if (mp_init_size(&y1, b->used - B) != MP_OKAY) { + if ((err = mp_init_size(&y1, b->used - B)) != MP_OKAY) { goto Y0; } /* init temps */ - if (mp_init_size(&t1, B * 2) != MP_OKAY) { + if ((err = mp_init_size(&t1, B * 2)) != MP_OKAY) { goto Y1; } - if (mp_init_size(&x0y0, B * 2) != MP_OKAY) { + if ((err = mp_init_size(&x0y0, B * 2)) != MP_OKAY) { goto T1; } - if (mp_init_size(&x1y1, B * 2) != MP_OKAY) { + if ((err = mp_init_size(&x1y1, B * 2)) != MP_OKAY) { goto X0Y0; } @@ -74,33 +74,13 @@ mp_err s_mp_karatsuba_mul(const mp_int *a, const mp_int *b, mp_int *c) x1.used = a->used - B; y1.used = b->used - B; - { - int x; - mp_digit *tmpa, *tmpb, *tmpx, *tmpy; - - /* we copy the digits directly instead of using higher level functions - * since we also need to shift the digits - */ - tmpa = a->dp; - tmpb = b->dp; - - tmpx = x0.dp; - tmpy = y0.dp; - for (x = 0; x < B; x++) { - *tmpx++ = *tmpa++; - *tmpy++ = *tmpb++; - } - - tmpx = x1.dp; - for (x = B; x < a->used; x++) { - *tmpx++ = *tmpa++; - } - - tmpy = y1.dp; - for (x = B; x < b->used; x++) { - *tmpy++ = *tmpb++; - } - } + /* we copy the digits directly instead of using higher level functions + * since we also need to shift the digits + */ + s_mp_copy_digs(x0.dp, a->dp, x0.used); + s_mp_copy_digs(y0.dp, b->dp, y0.used); + s_mp_copy_digs(x1.dp, a->dp + B, x1.used); + s_mp_copy_digs(y1.dp, b->dp + B, y1.used); /* only need to clamp the lower words since by definition the * upper words x1/y1 must have a known number of digits @@ -110,50 +90,47 @@ mp_err s_mp_karatsuba_mul(const mp_int *a, const mp_int *b, mp_int *c) /* now calc the products x0y0 and x1y1 */ /* after this x0 is no longer required, free temp [x0==t2]! */ - if (mp_mul(&x0, &y0, &x0y0) != MP_OKAY) { + if ((err = mp_mul(&x0, &y0, &x0y0)) != MP_OKAY) { goto X1Y1; /* x0y0 = x0*y0 */ } - if (mp_mul(&x1, &y1, &x1y1) != MP_OKAY) { + if ((err = mp_mul(&x1, &y1, &x1y1)) != MP_OKAY) { goto X1Y1; /* x1y1 = x1*y1 */ } /* now calc x1+x0 and y1+y0 */ - if (s_mp_add(&x1, &x0, &t1) != MP_OKAY) { + if ((err = s_mp_add(&x1, &x0, &t1)) != MP_OKAY) { goto X1Y1; /* t1 = x1 - x0 */ } - if (s_mp_add(&y1, &y0, &x0) != MP_OKAY) { + if ((err = s_mp_add(&y1, &y0, &x0)) != MP_OKAY) { goto X1Y1; /* t2 = y1 - y0 */ } - if (mp_mul(&t1, &x0, &t1) != MP_OKAY) { + if ((err = mp_mul(&t1, &x0, &t1)) != MP_OKAY) { goto X1Y1; /* t1 = (x1 + x0) * (y1 + y0) */ } /* add x0y0 */ - if (mp_add(&x0y0, &x1y1, &x0) != MP_OKAY) { + if ((err = mp_add(&x0y0, &x1y1, &x0)) != MP_OKAY) { goto X1Y1; /* t2 = x0y0 + x1y1 */ } - if (s_mp_sub(&t1, &x0, &t1) != MP_OKAY) { + if ((err = s_mp_sub(&t1, &x0, &t1)) != MP_OKAY) { goto X1Y1; /* t1 = (x1+x0)*(y1+y0) - (x1y1 + x0y0) */ } /* shift by B */ - if (mp_lshd(&t1, B) != MP_OKAY) { + if ((err = mp_lshd(&t1, B)) != MP_OKAY) { goto X1Y1; /* t1 = (x0y0 + x1y1 - (x1-x0)*(y1-y0))<dp[count]; - a0.used++; - } - mp_clamp(&a0); if ((err = mp_init_size(&a1, B)) != MP_OKAY) goto LBL_ERRa1; - for (; count < (2 * B); count++) { - a1.dp[count - B] = a->dp[count]; - a1.used++; - } + if ((err = mp_init_size(&a2, a->used - 2 * B)) != MP_OKAY) goto LBL_ERRa2; + + a0.used = a1.used = B; + a2.used = a->used - 2 * B; + s_mp_copy_digs(a0.dp, a->dp, a0.used); + s_mp_copy_digs(a1.dp, a->dp + B, a1.used); + s_mp_copy_digs(a2.dp, a->dp + 2 * B, a2.used); + mp_clamp(&a0); mp_clamp(&a1); - if ((err = mp_init_size(&a2, B + (a->used - (3 * B)))) != MP_OKAY) goto LBL_ERRa2; - for (; count < a->used; count++) { - a2.dp[count - (2 * B)] = a->dp[count]; - a2.used++; - } mp_clamp(&a2); /** b = b2 * x^2 + b1 * x + b0; */ if ((err = mp_init_size(&b0, B)) != MP_OKAY) goto LBL_ERRb0; - for (count = 0; count < B; count++) { - b0.dp[count] = b->dp[count]; - b0.used++; - } - mp_clamp(&b0); if ((err = mp_init_size(&b1, B)) != MP_OKAY) goto LBL_ERRb1; - for (; count < (2 * B); count++) { - b1.dp[count - B] = b->dp[count]; - b1.used++; - } + if ((err = mp_init_size(&b2, b->used - 2 * B)) != MP_OKAY) goto LBL_ERRb2; + + b0.used = b1.used = B; + b2.used = b->used - 2 * B; + s_mp_copy_digs(b0.dp, b->dp, b0.used); + s_mp_copy_digs(b1.dp, b->dp + B, b1.used); + s_mp_copy_digs(b2.dp, b->dp + 2 * B, b2.used); + mp_clamp(&b0); mp_clamp(&b1); - if ((err = mp_init_size(&b2, B + (b->used - (3 * B)))) != MP_OKAY) goto LBL_ERRb2; - for (; count < b->used; count++) { - b2.dp[count - (2 * B)] = b->dp[count]; - b2.used++; - } mp_clamp(&b2); /** \\ S1 = (a2+a1+a0) * (b2+b1+b0); */ diff --git a/s_mp_prime_is_divisible.c b/s_mp_prime_is_divisible.c index 0cca5a6..63b2405 100644 --- a/s_mp_prime_is_divisible.c +++ b/s_mp_prime_is_divisible.c @@ -10,16 +10,12 @@ */ mp_err s_mp_prime_is_divisible(const mp_int *a, bool *result) { - int ix; - mp_err err; - mp_digit res; - - /* default to not */ - *result = false; - - for (ix = 0; ix < MP_PRIME_TAB_SIZE; ix++) { - /* what is a mod LBL_prime_tab[ix] */ - if ((err = mp_mod_d(a, s_mp_prime_tab[ix], &res)) != MP_OKAY) { + int i; + for (i = 0; i < MP_PRIME_TAB_SIZE; i++) { + /* what is a mod LBL_prime_tab[i] */ + mp_err err; + mp_digit res; + if ((err = mp_mod_d(a, s_mp_prime_tab[i], &res)) != MP_OKAY) { return err; } @@ -30,6 +26,8 @@ mp_err s_mp_prime_is_divisible(const mp_int *a, bool *result) } } + /* default to not */ + *result = false; return MP_OKAY; } #endif diff --git a/s_mp_sqr.c b/s_mp_sqr.c index 61106ed..4a20306 100644 --- a/s_mp_sqr.c +++ b/s_mp_sqr.c @@ -7,10 +7,8 @@ mp_err s_mp_sqr(const mp_int *a, mp_int *b) { mp_int t; - int ix, iy, pa; + int ix, pa; mp_err err; - mp_word r; - mp_digit u, tmpx, *tmpt; pa = a->used; if ((err = mp_init_size(&t, (2 * pa) + 1)) != MP_OKAY) { @@ -21,10 +19,13 @@ mp_err s_mp_sqr(const mp_int *a, mp_int *b) t.used = (2 * pa) + 1; for (ix = 0; ix < pa; ix++) { + mp_digit u; + int iy; + /* first calculate the digit at 2*ix */ /* calculate double precision result */ - r = (mp_word)t.dp[2*ix] + - ((mp_word)a->dp[ix] * (mp_word)a->dp[ix]); + mp_word r = (mp_word)t.dp[2*ix] + + ((mp_word)a->dp[ix] * (mp_word)a->dp[ix]); /* store lower part in result */ t.dp[ix+ix] = (mp_digit)(r & (mp_word)MP_MASK); @@ -32,32 +33,27 @@ mp_err s_mp_sqr(const mp_int *a, mp_int *b) /* get the carry */ u = (mp_digit)(r >> (mp_word)MP_DIGIT_BIT); - /* left hand side of A[ix] * A[iy] */ - tmpx = a->dp[ix]; - - /* alias for where to store the results */ - tmpt = t.dp + ((2 * ix) + 1); - for (iy = ix + 1; iy < pa; iy++) { /* first calculate the product */ - r = (mp_word)tmpx * (mp_word)a->dp[iy]; + r = (mp_word)a->dp[ix] * (mp_word)a->dp[iy]; /* now calculate the double precision result, note we use * addition instead of *2 since it's easier to optimize */ - r = (mp_word)*tmpt + r + r + (mp_word)u; + r = (mp_word)t.dp[ix + iy] + r + r + (mp_word)u; /* store lower part */ - *tmpt++ = (mp_digit)(r & (mp_word)MP_MASK); + t.dp[ix + iy] = (mp_digit)(r & (mp_word)MP_MASK); /* get carry */ u = (mp_digit)(r >> (mp_word)MP_DIGIT_BIT); } /* propagate upwards */ while (u != 0uL) { - r = (mp_word)*tmpt + (mp_word)u; - *tmpt++ = (mp_digit)(r & (mp_word)MP_MASK); + r = (mp_word)t.dp[ix + iy] + (mp_word)u; + t.dp[ix + iy] = (mp_digit)(r & (mp_word)MP_MASK); u = (mp_digit)(r >> (mp_word)MP_DIGIT_BIT); + ++iy; } } diff --git a/s_mp_sqr_fast.c b/s_mp_sqr_comba.c similarity index 71% rename from s_mp_sqr_fast.c rename to s_mp_sqr_comba.c index bcb1f5e..cb88dcc 100644 --- a/s_mp_sqr_fast.c +++ b/s_mp_sqr_comba.c @@ -1,5 +1,5 @@ #include "tommath_private.h" -#ifdef S_MP_SQR_FAST_C +#ifdef S_MP_SQR_COMBA_C /* LibTomMath, multiple-precision integer library -- Tom St Denis */ /* SPDX-License-Identifier: Unlicense */ @@ -13,27 +13,24 @@ After that loop you do the squares and add them in. */ -mp_err s_mp_sqr_fast(const mp_int *a, mp_int *b) +mp_err s_mp_sqr_comba(const mp_int *a, mp_int *b) { - int olduse, pa, ix, iz; - mp_digit W[MP_WARRAY], *tmpx; + int oldused, pa, ix; + mp_digit W[MP_WARRAY]; mp_word W1; - mp_err err; + mp_err err; /* grow the destination as required */ pa = a->used + a->used; - if (b->alloc < pa) { - if ((err = mp_grow(b, pa)) != MP_OKAY) { - return err; - } + if ((err = mp_grow(b, pa)) != MP_OKAY) { + return err; } /* number of output digits to produce */ W1 = 0; for (ix = 0; ix < pa; ix++) { - int tx, ty, iy; + int tx, ty, iy, iz; mp_word _W; - mp_digit *tmpy; /* clear counter */ _W = 0; @@ -42,10 +39,6 @@ mp_err s_mp_sqr_fast(const mp_int *a, mp_int *b) ty = MP_MIN(a->used-1, ix); tx = ix - ty; - /* setup temp aliases */ - tmpx = a->dp + tx; - tmpy = a->dp + ty; - /* this is the number of times the loop will iterrate, essentially while (tx++ < a->used && ty-- >= 0) { ... } */ @@ -59,7 +52,7 @@ mp_err s_mp_sqr_fast(const mp_int *a, mp_int *b) /* execute loop */ for (iz = 0; iz < iy; iz++) { - _W += (mp_word)*tmpx++ * (mp_word)*tmpy--; + _W += (mp_word)a->dp[tx + iz] * (mp_word)a->dp[ty - iz]; } /* double the inner product and add carry */ @@ -78,19 +71,16 @@ mp_err s_mp_sqr_fast(const mp_int *a, mp_int *b) } /* setup dest */ - olduse = b->used; + oldused = b->used; b->used = a->used+a->used; - { - mp_digit *tmpb; - tmpb = b->dp; - for (ix = 0; ix < pa; ix++) { - *tmpb++ = W[ix] & MP_MASK; - } - - /* clear unused digits [that existed in the old copy of c] */ - MP_ZERO_DIGITS(tmpb, olduse - ix); + for (ix = 0; ix < pa; ix++) { + b->dp[ix] = W[ix] & MP_MASK; } + + /* clear unused digits [that existed in the old copy of c] */ + s_mp_zero_digs(b->dp + b->used, oldused - b->used); + mp_clamp(b); return MP_OKAY; } diff --git a/s_mp_karatsuba_sqr.c b/s_mp_sqr_karatsuba.c similarity index 55% rename from s_mp_karatsuba_sqr.c rename to s_mp_sqr_karatsuba.c index 7f22842..f064b46 100644 --- a/s_mp_karatsuba_sqr.c +++ b/s_mp_sqr_karatsuba.c @@ -1,20 +1,20 @@ #include "tommath_private.h" -#ifdef S_MP_KARATSUBA_SQR_C +#ifdef S_MP_SQR_KARATSUBA_C /* LibTomMath, multiple-precision integer library -- Tom St Denis */ /* SPDX-License-Identifier: Unlicense */ /* Karatsuba squaring, computes b = a*a using three * half size squarings * - * See comments of karatsuba_mul for details. It + * See comments of mul_karatsuba for details. It * is essentially the same algorithm but merely * tuned to perform recursive squarings. */ -mp_err s_mp_karatsuba_sqr(const mp_int *a, mp_int *b) +mp_err s_mp_sqr_karatsuba(const mp_int *a, mp_int *b) { mp_int x0, x1, t1, t2, x0x0, x1x1; - int B; - mp_err err = MP_MEM; + int B; + mp_err err; /* min # of digits */ B = a->used; @@ -23,75 +23,57 @@ mp_err s_mp_karatsuba_sqr(const mp_int *a, mp_int *b) B = B >> 1; /* init copy all the temps */ - if (mp_init_size(&x0, B) != MP_OKAY) + if ((err = mp_init_size(&x0, B)) != MP_OKAY) goto LBL_ERR; - if (mp_init_size(&x1, a->used - B) != MP_OKAY) + if ((err = mp_init_size(&x1, a->used - B)) != MP_OKAY) goto X0; /* init temps */ - if (mp_init_size(&t1, a->used * 2) != MP_OKAY) + if ((err = mp_init_size(&t1, a->used * 2)) != MP_OKAY) goto X1; - if (mp_init_size(&t2, a->used * 2) != MP_OKAY) + if ((err = mp_init_size(&t2, a->used * 2)) != MP_OKAY) goto T1; - if (mp_init_size(&x0x0, B * 2) != MP_OKAY) + if ((err = mp_init_size(&x0x0, B * 2)) != MP_OKAY) goto T2; - if (mp_init_size(&x1x1, (a->used - B) * 2) != MP_OKAY) + if ((err = mp_init_size(&x1x1, (a->used - B) * 2)) != MP_OKAY) goto X0X0; - { - int x; - mp_digit *dst, *src; - - src = a->dp; - - /* now shift the digits */ - dst = x0.dp; - for (x = 0; x < B; x++) { - *dst++ = *src++; - } - - dst = x1.dp; - for (x = B; x < a->used; x++) { - *dst++ = *src++; - } - } - + /* now shift the digits */ x0.used = B; x1.used = a->used - B; - + s_mp_copy_digs(x0.dp, a->dp, x0.used); + s_mp_copy_digs(x1.dp, a->dp + B, x1.used); mp_clamp(&x0); /* now calc the products x0*x0 and x1*x1 */ - if (mp_sqr(&x0, &x0x0) != MP_OKAY) + if ((err = mp_sqr(&x0, &x0x0)) != MP_OKAY) goto X1X1; /* x0x0 = x0*x0 */ - if (mp_sqr(&x1, &x1x1) != MP_OKAY) + if ((err = mp_sqr(&x1, &x1x1)) != MP_OKAY) goto X1X1; /* x1x1 = x1*x1 */ /* now calc (x1+x0)**2 */ - if (s_mp_add(&x1, &x0, &t1) != MP_OKAY) + if ((err = s_mp_add(&x1, &x0, &t1)) != MP_OKAY) goto X1X1; /* t1 = x1 - x0 */ - if (mp_sqr(&t1, &t1) != MP_OKAY) + if ((err = mp_sqr(&t1, &t1)) != MP_OKAY) goto X1X1; /* t1 = (x1 - x0) * (x1 - x0) */ /* add x0y0 */ - if (s_mp_add(&x0x0, &x1x1, &t2) != MP_OKAY) + if ((err = s_mp_add(&x0x0, &x1x1, &t2)) != MP_OKAY) goto X1X1; /* t2 = x0x0 + x1x1 */ - if (s_mp_sub(&t1, &t2, &t1) != MP_OKAY) + if ((err = s_mp_sub(&t1, &t2, &t1)) != MP_OKAY) goto X1X1; /* t1 = (x1+x0)**2 - (x0x0 + x1x1) */ /* shift by B */ - if (mp_lshd(&t1, B) != MP_OKAY) + if ((err = mp_lshd(&t1, B)) != MP_OKAY) goto X1X1; /* t1 = (x0x0 + x1x1 - (x1-x0)*(x1-x0))<used - (3 * B)))) != MP_OKAY) goto LBL_ERRa2; + if ((err = mp_init_size(&a2, a->used - (2 * B))) != MP_OKAY) goto LBL_ERRa2; - tmpa = a->dp; - tmpc = a0.dp; - for (count = 0; count < B; count++) { - *tmpc++ = *tmpa++; - } - tmpc = a1.dp; - for (; count < (2 * B); count++) { - *tmpc++ = *tmpa++; - } - tmpc = a2.dp; - for (; count < a->used; count++) { - *tmpc++ = *tmpa++; - a2.used++; - } + a0.used = a1.used = B; + a2.used = a->used - 2 * B; + s_mp_copy_digs(a0.dp, a->dp, a0.used); + s_mp_copy_digs(a1.dp, a->dp + B, a1.used); + s_mp_copy_digs(a2.dp, a->dp + 2 * B, a2.used); mp_clamp(&a0); mp_clamp(&a1); mp_clamp(&a2); diff --git a/s_mp_sub.c b/s_mp_sub.c index bef1fce..b1a749e 100644 --- a/s_mp_sub.c +++ b/s_mp_sub.c @@ -6,64 +6,49 @@ /* low level subtraction (assumes |a| > |b|), HAC pp.595 Algorithm 14.9 */ mp_err s_mp_sub(const mp_int *a, const mp_int *b, mp_int *c) { - int olduse, min, max; + int oldused = c->used, min = b->used, max = a->used, i; + mp_digit u; mp_err err; - /* find sizes */ - min = b->used; - max = a->used; - /* init result */ - if (c->alloc < max) { - if ((err = mp_grow(c, max)) != MP_OKAY) { - return err; - } + if ((err = mp_grow(c, max)) != MP_OKAY) { + return err; } - olduse = c->used; + c->used = max; - { - mp_digit u, *tmpa, *tmpb, *tmpc; - int i; + /* set carry to zero */ + u = 0; + for (i = 0; i < min; i++) { + /* T[i] = A[i] - B[i] - U */ + c->dp[i] = (a->dp[i] - b->dp[i]) - u; - /* alias for digit pointers */ - tmpa = a->dp; - tmpb = b->dp; - tmpc = c->dp; + /* U = carry bit of T[i] + * Note this saves performing an AND operation since + * if a carry does occur it will propagate all the way to the + * MSB. As a result a single shift is enough to get the carry + */ + u = c->dp[i] >> (MP_SIZEOF_BITS(mp_digit) - 1u); - /* set carry to zero */ - u = 0; - for (i = 0; i < min; i++) { - /* T[i] = A[i] - B[i] - U */ - *tmpc = (*tmpa++ - *tmpb++) - u; - - /* U = carry bit of T[i] - * Note this saves performing an AND operation since - * if a carry does occur it will propagate all the way to the - * MSB. As a result a single shift is enough to get the carry - */ - u = *tmpc >> (MP_SIZEOF_BITS(mp_digit) - 1u); - - /* Clear carry from T[i] */ - *tmpc++ &= MP_MASK; - } - - /* now copy higher words if any, e.g. if A has more digits than B */ - for (; i < max; i++) { - /* T[i] = A[i] - U */ - *tmpc = *tmpa++ - u; - - /* U = carry bit of T[i] */ - u = *tmpc >> (MP_SIZEOF_BITS(mp_digit) - 1u); - - /* Clear carry from T[i] */ - *tmpc++ &= MP_MASK; - } - - /* clear digits above used (since we may not have grown result above) */ - MP_ZERO_DIGITS(tmpc, olduse - c->used); + /* Clear carry from T[i] */ + c->dp[i] &= MP_MASK; } + /* now copy higher words if any, e.g. if A has more digits than B */ + for (; i < max; i++) { + /* T[i] = A[i] - U */ + c->dp[i] = a->dp[i] - u; + + /* U = carry bit of T[i] */ + u = c->dp[i] >> (MP_SIZEOF_BITS(mp_digit) - 1u); + + /* Clear carry from T[i] */ + c->dp[i] &= MP_MASK; + } + + /* clear digits above used (since we may not have grown result above) */ + s_mp_zero_digs(c->dp + c->used, oldused - c->used); + mp_clamp(c); return MP_OKAY; } diff --git a/s_mp_zero_buf.c b/s_mp_zero_buf.c new file mode 100644 index 0000000..23a458d --- /dev/null +++ b/s_mp_zero_buf.c @@ -0,0 +1,22 @@ +#include "tommath_private.h" +#ifdef S_MP_ZERO_BUF_C +/* LibTomMath, multiple-precision integer library -- Tom St Denis */ +/* SPDX-License-Identifier: Unlicense */ + +#ifdef MP_USE_MEMOPS +# include +#endif + +void s_mp_zero_buf(void *mem, size_t size) +{ +#ifdef MP_USE_MEMOPS + memset(mem, 0, size); +#else + char *m = (char *)mem; + while (size-- > 0u) { + *m++ = '\0'; + } +#endif +} + +#endif diff --git a/s_mp_zero_digs.c b/s_mp_zero_digs.c new file mode 100644 index 0000000..79e8377 --- /dev/null +++ b/s_mp_zero_digs.c @@ -0,0 +1,23 @@ +#include "tommath_private.h" +#ifdef S_MP_ZERO_DIGS_C +/* LibTomMath, multiple-precision integer library -- Tom St Denis */ +/* SPDX-License-Identifier: Unlicense */ + +#ifdef MP_USE_MEMOPS +# include +#endif + +void s_mp_zero_digs(mp_digit *d, int digits) +{ +#ifdef MP_USE_MEMOPS + if (digits > 0) { + memset(d, 0, (size_t)digits * sizeof(mp_digit)); + } +#else + while (digits-- > 0) { + *d++ = 0; + } +#endif +} + +#endif diff --git a/tommath.def b/tommath.def index db0aba4..4d81e34 100644 --- a/tommath.def +++ b/tommath.def @@ -72,7 +72,6 @@ EXPORTS mp_lshd mp_mod mp_mod_2d - mp_mod_d mp_montgomery_calc_normalization mp_montgomery_reduce mp_montgomery_setup diff --git a/tommath.h b/tommath.h index 86b19d3..d024a33 100644 --- a/tommath.h +++ b/tommath.h @@ -63,7 +63,7 @@ typedef uint32_t mp_digit; # ifdef MP_31BIT /* * This is an extension that uses 31-bit digits. - * Please be aware that not all functions support this size, especially s_mp_mul_digs_fast + * Please be aware that not all functions support this size, especially s_mp_mul_comba * will be reduced to work on small numbers only: * Up to 8 limbs, 248 bits instead of up to 512 limbs, 15872 bits with MP_28BIT. */ @@ -117,10 +117,10 @@ typedef enum { /* tunable cutoffs */ #ifndef MP_FIXED_CUTOFFS extern int -MP_KARATSUBA_MUL_CUTOFF, -MP_KARATSUBA_SQR_CUTOFF, -MP_TOOM_MUL_CUTOFF, -MP_TOOM_SQR_CUTOFF; +MP_MUL_KARATSUBA_CUTOFF, +MP_SQR_KARATSUBA_CUTOFF, +MP_MUL_TOOM_CUTOFF, +MP_SQR_TOOM_CUTOFF; #endif /* define this to use lower memory usage routines (exptmods mostly) */ @@ -398,7 +398,7 @@ mp_err mp_mul_d(const mp_int *a, mp_digit b, mp_int *c) MP_WUR; mp_err mp_div_d(const mp_int *a, mp_digit b, mp_int *c, mp_digit *d) MP_WUR; /* c = a mod b, 0 <= c < b */ -mp_err mp_mod_d(const mp_int *a, mp_digit b, mp_digit *c) MP_WUR; +#define mp_mod_d(a, b, c) mp_div_d((a), (b), NULL, (c)) /* ---> number theory <--- */ diff --git a/tommath_class.h b/tommath_class.h index 3a3d549..2a5380a 100644 --- a/tommath_class.h +++ b/tommath_class.h @@ -77,7 +77,6 @@ # define MP_LSHD_C # define MP_MOD_C # define MP_MOD_2D_C -# define MP_MOD_D_C # define MP_MONTGOMERY_CALC_NORMALIZATION_C # define MP_MONTGOMERY_REDUCE_C # define MP_MONTGOMERY_SETUP_C @@ -141,33 +140,36 @@ # define MP_XOR_C # define MP_ZERO_C # define S_MP_ADD_C -# define S_MP_BALANCE_MUL_C +# define S_MP_COPY_DIGS_C # define S_MP_DIV_RECURSIVE_C # define S_MP_DIV_SCHOOL_C # define S_MP_DIV_SMALL_C # define S_MP_EXPTMOD_C # define S_MP_EXPTMOD_FAST_C # define S_MP_GET_BIT_C -# define S_MP_INVMOD_FAST_C -# define S_MP_INVMOD_SLOW_C -# define S_MP_KARATSUBA_MUL_C -# define S_MP_KARATSUBA_SQR_C +# define S_MP_INVMOD_C +# define S_MP_INVMOD_ODD_C # define S_MP_LOG_C # define S_MP_LOG_D_C # define S_MP_LOG_POW2_C -# define S_MP_MONTGOMERY_REDUCE_FAST_C -# define S_MP_MUL_DIGS_C -# define S_MP_MUL_DIGS_FAST_C -# define S_MP_MUL_HIGH_DIGS_C -# define S_MP_MUL_HIGH_DIGS_FAST_C +# define S_MP_MONTGOMERY_REDUCE_COMBA_C +# define S_MP_MUL_C +# define S_MP_MUL_BALANCE_C +# define S_MP_MUL_COMBA_C +# define S_MP_MUL_HIGH_C +# define S_MP_MUL_HIGH_COMBA_C +# define S_MP_MUL_KARATSUBA_C +# define S_MP_MUL_TOOM_C # define S_MP_PRIME_IS_DIVISIBLE_C # define S_MP_RAND_JENKINS_C # define S_MP_RAND_PLATFORM_C # define S_MP_SQR_C -# define S_MP_SQR_FAST_C +# define S_MP_SQR_COMBA_C +# define S_MP_SQR_KARATSUBA_C +# define S_MP_SQR_TOOM_C # define S_MP_SUB_C -# define S_MP_TOOM_MUL_C -# define S_MP_TOOM_SQR_C +# define S_MP_ZERO_BUF_C +# define S_MP_ZERO_DIGS_C #endif #endif #if defined(MP_2EXPT_C) @@ -189,6 +191,7 @@ # define MP_CLAMP_C # define MP_GROW_C # define MP_SUB_D_C +# define S_MP_ZERO_DIGS_C #endif #if defined(MP_ADDMOD_C) @@ -207,6 +210,7 @@ #endif #if defined(MP_CLEAR_C) +# define S_MP_ZERO_DIGS_C #endif #if defined(MP_CLEAR_MULTI_C) @@ -227,12 +231,13 @@ #endif #if defined(MP_COMPLEMENT_C) -# define MP_NEG_C # define MP_SUB_D_C #endif #if defined(MP_COPY_C) # define MP_GROW_C +# define S_MP_COPY_DIGS_C +# define S_MP_ZERO_DIGS_C #endif #if defined(MP_COUNT_BITS_C) @@ -253,6 +258,7 @@ #if defined(MP_DIV_2_C) # define MP_CLAMP_C # define MP_GROW_C +# define S_MP_ZERO_DIGS_C #endif #if defined(MP_DIV_2D_C) @@ -260,7 +266,6 @@ # define MP_COPY_C # define MP_MOD_2D_C # define MP_RSHD_C -# define MP_ZERO_C #endif #if defined(MP_DIV_3_C) @@ -288,6 +293,7 @@ # define MP_CMP_MAG_C # define MP_GROW_C # define S_MP_SUB_C +# define S_MP_ZERO_DIGS_C #endif #if defined(MP_DR_SETUP_C) @@ -351,6 +357,7 @@ #if defined(MP_FWRITE_C) # define MP_RADIX_SIZE_C # define MP_TO_RADIX_C +# define S_MP_ZERO_BUF_C #endif #if defined(MP_GCD_C) @@ -397,6 +404,7 @@ #endif #if defined(MP_GROW_C) +# define S_MP_ZERO_DIGS_C #endif #if defined(MP_INIT_C) @@ -463,17 +471,17 @@ #if defined(MP_INVMOD_C) # define MP_CMP_D_C -# define S_MP_INVMOD_FAST_C -# define S_MP_INVMOD_SLOW_C +# define S_MP_INVMOD_C +# define S_MP_INVMOD_ODD_C #endif #if defined(MP_IS_SQUARE_C) # define MP_CLEAR_C # define MP_CMP_MAG_C +# define MP_DIV_D_C # define MP_GET_I32_C # define MP_INIT_U32_C # define MP_MOD_C -# define MP_MOD_D_C # define MP_SQRT_C # define MP_SQR_C #endif @@ -506,6 +514,7 @@ #if defined(MP_LSHD_C) # define MP_GROW_C +# define S_MP_ZERO_DIGS_C #endif #if defined(MP_MOD_C) @@ -520,10 +529,7 @@ # define MP_CLAMP_C # define MP_COPY_C # define MP_ZERO_C -#endif - -#if defined(MP_MOD_D_C) -# define MP_DIV_D_C +# define S_MP_ZERO_DIGS_C #endif #if defined(MP_MONTGOMERY_CALC_NORMALIZATION_C) @@ -540,7 +546,7 @@ # define MP_CMP_MAG_C # define MP_GROW_C # define MP_RSHD_C -# define S_MP_MONTGOMERY_REDUCE_FAST_C +# define S_MP_MONTGOMERY_REDUCE_COMBA_C # define S_MP_SUB_C #endif @@ -548,15 +554,16 @@ #endif #if defined(MP_MUL_C) -# define S_MP_BALANCE_MUL_C -# define S_MP_KARATSUBA_MUL_C -# define S_MP_MUL_DIGS_C -# define S_MP_MUL_DIGS_FAST_C -# define S_MP_TOOM_MUL_C +# define S_MP_MUL_BALANCE_C +# define S_MP_MUL_C +# define S_MP_MUL_COMBA_C +# define S_MP_MUL_KARATSUBA_C +# define S_MP_MUL_TOOM_C #endif #if defined(MP_MUL_2_C) # define MP_GROW_C +# define S_MP_ZERO_DIGS_C #endif #if defined(MP_MUL_2D_C) @@ -569,6 +576,7 @@ #if defined(MP_MUL_D_C) # define MP_CLAMP_C # define MP_GROW_C +# define S_MP_ZERO_DIGS_C #endif #if defined(MP_MULMOD_C) @@ -660,8 +668,8 @@ # define MP_ADD_D_C # define MP_CLEAR_C # define MP_CMP_D_C +# define MP_DIV_D_C # define MP_INIT_C -# define MP_MOD_D_C # define MP_PRIME_IS_PRIME_C # define MP_SET_C # define MP_SUB_D_C @@ -678,6 +686,7 @@ # define MP_PRIME_IS_PRIME_C # define MP_SUB_D_C # define S_MP_RAND_SOURCE_C +# define S_MP_ZERO_BUF_C #endif #if defined(MP_PRIME_STRONG_LUCAS_SELFRIDGE_C) @@ -744,9 +753,9 @@ # define MP_RSHD_C # define MP_SET_C # define MP_SUB_C -# define S_MP_MUL_DIGS_C -# define S_MP_MUL_HIGH_DIGS_C -# define S_MP_MUL_HIGH_DIGS_FAST_C +# define S_MP_MUL_C +# define S_MP_MUL_HIGH_C +# define S_MP_MUL_HIGH_COMBA_C # define S_MP_SUB_C #endif @@ -820,6 +829,7 @@ #if defined(MP_RSHD_C) # define MP_ZERO_C +# define S_MP_ZERO_DIGS_C #endif #if defined(MP_SBIN_SIZE_C) @@ -827,6 +837,7 @@ #endif #if defined(MP_SET_C) +# define S_MP_ZERO_DIGS_C #endif #if defined(MP_SET_DOUBLE_C) @@ -852,15 +863,19 @@ #endif #if defined(MP_SET_U32_C) +# define S_MP_ZERO_DIGS_C #endif #if defined(MP_SET_U64_C) +# define S_MP_ZERO_DIGS_C #endif #if defined(MP_SET_UL_C) +# define S_MP_ZERO_DIGS_C #endif #if defined(MP_SET_ULL_C) +# define S_MP_ZERO_DIGS_C #endif #if defined(MP_SHRINK_C) @@ -873,10 +888,10 @@ #endif #if defined(MP_SQR_C) -# define S_MP_KARATSUBA_SQR_C # define S_MP_SQR_C -# define S_MP_SQR_FAST_C -# define S_MP_TOOM_SQR_C +# define S_MP_SQR_COMBA_C +# define S_MP_SQR_KARATSUBA_C +# define S_MP_SQR_TOOM_C #endif #if defined(MP_SQRMOD_C) @@ -905,10 +920,10 @@ # define MP_CMP_D_C # define MP_COPY_C # define MP_DIV_2_C +# define MP_DIV_D_C # define MP_EXPTMOD_C # define MP_INIT_MULTI_C # define MP_KRONECKER_C -# define MP_MOD_D_C # define MP_MULMOD_C # define MP_SET_C # define MP_SQRMOD_C @@ -926,6 +941,7 @@ # define MP_ADD_D_C # define MP_CLAMP_C # define MP_GROW_C +# define S_MP_ZERO_DIGS_C #endif #if defined(MP_SUBMOD_C) @@ -969,23 +985,16 @@ #endif #if defined(MP_ZERO_C) +# define S_MP_ZERO_DIGS_C #endif #if defined(S_MP_ADD_C) # define MP_CLAMP_C # define MP_GROW_C +# define S_MP_ZERO_DIGS_C #endif -#if defined(S_MP_BALANCE_MUL_C) -# define MP_ADD_C -# define MP_CLAMP_C -# define MP_CLEAR_C -# define MP_CLEAR_MULTI_C -# define MP_EXCH_C -# define MP_INIT_MULTI_C -# define MP_INIT_SIZE_C -# define MP_LSHD_C -# define MP_MUL_C +#if defined(S_MP_COPY_DIGS_C) #endif #if defined(S_MP_DIV_RECURSIVE_C) @@ -1075,13 +1084,13 @@ # define MP_REDUCE_2K_SETUP_C # define MP_SET_C # define MP_SQR_C -# define S_MP_MONTGOMERY_REDUCE_FAST_C +# define S_MP_MONTGOMERY_REDUCE_COMBA_C #endif #if defined(S_MP_GET_BIT_C) #endif -#if defined(S_MP_INVMOD_FAST_C) +#if defined(S_MP_INVMOD_C) # define MP_ADD_C # define MP_CLEAR_MULTI_C # define MP_CMP_C @@ -1096,7 +1105,7 @@ # define MP_SUB_C #endif -#if defined(S_MP_INVMOD_SLOW_C) +#if defined(S_MP_INVMOD_ODD_C) # define MP_ADD_C # define MP_CLEAR_MULTI_C # define MP_CMP_C @@ -1111,28 +1120,6 @@ # define MP_SUB_C #endif -#if defined(S_MP_KARATSUBA_MUL_C) -# define MP_ADD_C -# define MP_CLAMP_C -# define MP_CLEAR_C -# define MP_INIT_SIZE_C -# define MP_LSHD_C -# define MP_MUL_C -# define S_MP_ADD_C -# define S_MP_SUB_C -#endif - -#if defined(S_MP_KARATSUBA_SQR_C) -# define MP_ADD_C -# define MP_CLAMP_C -# define MP_CLEAR_C -# define MP_INIT_SIZE_C -# define MP_LSHD_C -# define MP_SQR_C -# define S_MP_ADD_C -# define S_MP_SUB_C -#endif - #if defined(S_MP_LOG_C) # define MP_CLEAR_MULTI_C # define MP_CMP_C @@ -1153,41 +1140,86 @@ # define MP_COUNT_BITS_C #endif -#if defined(S_MP_MONTGOMERY_REDUCE_FAST_C) +#if defined(S_MP_MONTGOMERY_REDUCE_COMBA_C) # define MP_CLAMP_C # define MP_CMP_MAG_C # define MP_GROW_C # define S_MP_SUB_C +# define S_MP_ZERO_BUF_C +# define S_MP_ZERO_DIGS_C #endif -#if defined(S_MP_MUL_DIGS_C) +#if defined(S_MP_MUL_C) # define MP_CLAMP_C # define MP_CLEAR_C # define MP_EXCH_C # define MP_INIT_SIZE_C -# define S_MP_MUL_DIGS_FAST_C +# define S_MP_MUL_COMBA_C #endif -#if defined(S_MP_MUL_DIGS_FAST_C) +#if defined(S_MP_MUL_BALANCE_C) +# define MP_ADD_C +# define MP_CLAMP_C +# define MP_CLEAR_C +# define MP_CLEAR_MULTI_C +# define MP_EXCH_C +# define MP_INIT_MULTI_C +# define MP_INIT_SIZE_C +# define MP_LSHD_C +# define MP_MUL_C +# define S_MP_COPY_DIGS_C +#endif + +#if defined(S_MP_MUL_COMBA_C) # define MP_CLAMP_C # define MP_GROW_C +# define S_MP_ZERO_DIGS_C #endif -#if defined(S_MP_MUL_HIGH_DIGS_C) +#if defined(S_MP_MUL_HIGH_C) # define MP_CLAMP_C # define MP_CLEAR_C # define MP_EXCH_C # define MP_INIT_SIZE_C -# define S_MP_MUL_HIGH_DIGS_FAST_C +# define S_MP_MUL_HIGH_COMBA_C #endif -#if defined(S_MP_MUL_HIGH_DIGS_FAST_C) +#if defined(S_MP_MUL_HIGH_COMBA_C) # define MP_CLAMP_C # define MP_GROW_C +# define S_MP_ZERO_DIGS_C +#endif + +#if defined(S_MP_MUL_KARATSUBA_C) +# define MP_ADD_C +# define MP_CLAMP_C +# define MP_CLEAR_C +# define MP_INIT_SIZE_C +# define MP_LSHD_C +# define MP_MUL_C +# define S_MP_ADD_C +# define S_MP_COPY_DIGS_C +# define S_MP_SUB_C +#endif + +#if defined(S_MP_MUL_TOOM_C) +# define MP_ADD_C +# define MP_CLAMP_C +# define MP_CLEAR_C +# define MP_CLEAR_MULTI_C +# define MP_DIV_2_C +# define MP_DIV_3_C +# define MP_INIT_MULTI_C +# define MP_INIT_SIZE_C +# define MP_LSHD_C +# define MP_MUL_2_C +# define MP_MUL_C +# define MP_SUB_C +# define S_MP_COPY_DIGS_C #endif #if defined(S_MP_PRIME_IS_DIVISIBLE_C) -# define MP_MOD_D_C +# define MP_DIV_D_C #endif #if defined(S_MP_RAND_JENKINS_C) @@ -1204,32 +1236,25 @@ # define MP_INIT_SIZE_C #endif -#if defined(S_MP_SQR_FAST_C) +#if defined(S_MP_SQR_COMBA_C) # define MP_CLAMP_C # define MP_GROW_C +# define S_MP_ZERO_DIGS_C #endif -#if defined(S_MP_SUB_C) -# define MP_CLAMP_C -# define MP_GROW_C -#endif - -#if defined(S_MP_TOOM_MUL_C) +#if defined(S_MP_SQR_KARATSUBA_C) # define MP_ADD_C # define MP_CLAMP_C # define MP_CLEAR_C -# define MP_CLEAR_MULTI_C -# define MP_DIV_2_C -# define MP_DIV_3_C -# define MP_INIT_MULTI_C # define MP_INIT_SIZE_C # define MP_LSHD_C -# define MP_MUL_2_C -# define MP_MUL_C -# define MP_SUB_C +# define MP_SQR_C +# define S_MP_ADD_C +# define S_MP_COPY_DIGS_C +# define S_MP_SUB_C #endif -#if defined(S_MP_TOOM_SQR_C) +#if defined(S_MP_SQR_TOOM_C) # define MP_ADD_C # define MP_CLAMP_C # define MP_CLEAR_C @@ -1241,6 +1266,19 @@ # define MP_MUL_C # define MP_SQR_C # define MP_SUB_C +# define S_MP_COPY_DIGS_C +#endif + +#if defined(S_MP_SUB_C) +# define MP_CLAMP_C +# define MP_GROW_C +# define S_MP_ZERO_DIGS_C +#endif + +#if defined(S_MP_ZERO_BUF_C) +#endif + +#if defined(S_MP_ZERO_DIGS_C) #endif #ifdef LTM_INSIDE diff --git a/tommath_cutoffs.h b/tommath_cutoffs.h index a65a9b3..fb84160 100644 --- a/tommath_cutoffs.h +++ b/tommath_cutoffs.h @@ -7,7 +7,7 @@ on the aforementioned machine for example. */ -#define MP_DEFAULT_KARATSUBA_MUL_CUTOFF 80 -#define MP_DEFAULT_KARATSUBA_SQR_CUTOFF 120 -#define MP_DEFAULT_TOOM_MUL_CUTOFF 350 -#define MP_DEFAULT_TOOM_SQR_CUTOFF 400 +#define MP_DEFAULT_MUL_KARATSUBA_CUTOFF 80 +#define MP_DEFAULT_SQR_KARATSUBA_CUTOFF 120 +#define MP_DEFAULT_MUL_TOOM_CUTOFF 350 +#define MP_DEFAULT_SQR_TOOM_CUTOFF 400 diff --git a/tommath_private.h b/tommath_private.h index f2989d4..0969796 100644 --- a/tommath_private.h +++ b/tommath_private.h @@ -42,55 +42,25 @@ * define MP_NO_ZERO_ON_FREE during compilation. */ #ifdef MP_NO_ZERO_ON_FREE -# define MP_FREE_BUFFER(mem, size) MP_FREE((mem), (size)) -# define MP_FREE_DIGITS(mem, digits) MP_FREE((mem), sizeof (mp_digit) * (size_t)(digits)) +# define MP_FREE_BUF(mem, size) MP_FREE((mem), (size)) +# define MP_FREE_DIGS(mem, digits) MP_FREE((mem), sizeof (mp_digit) * (size_t)(digits)) #else -# define MP_FREE_BUFFER(mem, size) \ +# define MP_FREE_BUF(mem, size) \ do { \ size_t fs_ = (size); \ void* fm_ = (mem); \ if (fm_ != NULL) { \ - MP_ZERO_BUFFER(fm_, fs_); \ + s_mp_zero_buf(fm_, fs_); \ MP_FREE(fm_, fs_); \ } \ } while (0) -# define MP_FREE_DIGITS(mem, digits) \ +# define MP_FREE_DIGS(mem, digits) \ do { \ int fd_ = (digits); \ - void* fm_ = (mem); \ + mp_digit* fm_ = (mem); \ if (fm_ != NULL) { \ - size_t fs_ = sizeof (mp_digit) * (size_t)fd_; \ - MP_ZERO_BUFFER(fm_, fs_); \ - MP_FREE(fm_, fs_); \ - } \ -} while (0) -#endif - -#ifdef MP_USE_MEMSET -# include -# define MP_ZERO_BUFFER(mem, size) memset((mem), 0, (size)) -# define MP_ZERO_DIGITS(mem, digits) \ -do { \ - int zd_ = (digits); \ - if (zd_ > 0) { \ - memset((mem), 0, sizeof(mp_digit) * (size_t)zd_); \ - } \ -} while (0) -#else -# define MP_ZERO_BUFFER(mem, size) \ -do { \ - size_t zs_ = (size); \ - char* zm_ = (char*)(mem); \ - while (zs_-- > 0u) { \ - *zm_++ = '\0'; \ - } \ -} while (0) -# define MP_ZERO_DIGITS(mem, digits) \ -do { \ - int zd_ = (digits); \ - mp_digit* zm_ = (mem); \ - while (zd_-- > 0) { \ - *zm_++ = 0; \ + s_mp_zero_digs(fm_, fd_); \ + MP_FREE(fm_, sizeof (mp_digit) * (size_t)fd_); \ } \ } while (0) #endif @@ -112,10 +82,10 @@ do { \ #ifdef MP_FIXED_CUTOFFS # include "tommath_cutoffs.h" -# define MP_KARATSUBA_MUL_CUTOFF MP_DEFAULT_KARATSUBA_MUL_CUTOFF -# define MP_KARATSUBA_SQR_CUTOFF MP_DEFAULT_KARATSUBA_SQR_CUTOFF -# define MP_TOOM_MUL_CUTOFF MP_DEFAULT_TOOM_MUL_CUTOFF -# define MP_TOOM_SQR_CUTOFF MP_DEFAULT_TOOM_SQR_CUTOFF +# define MP_MUL_KARATSUBA_CUTOFF MP_DEFAULT_MUL_KARATSUBA_CUTOFF +# define MP_SQR_KARATSUBA_CUTOFF MP_DEFAULT_SQR_KARATSUBA_CUTOFF +# define MP_MUL_TOOM_CUTOFF MP_DEFAULT_MUL_TOOM_CUTOFF +# define MP_SQR_TOOM_CUTOFF MP_DEFAULT_SQR_TOOM_CUTOFF #endif /* define heap macros */ @@ -188,23 +158,23 @@ MP_STATIC_ASSERT(prec_geq_min_prec, MP_PREC >= MP_MIN_PREC) extern MP_PRIVATE mp_err(*s_mp_rand_source)(void *out, size_t size); /* lowlevel functions, do not call! */ -MP_PRIVATE bool s_mp_get_bit(const mp_int *a, unsigned int b); +MP_PRIVATE bool s_mp_get_bit(const mp_int *a, int b); MP_PRIVATE mp_err s_mp_add(const mp_int *a, const mp_int *b, mp_int *c) MP_WUR; MP_PRIVATE mp_err s_mp_sub(const mp_int *a, const mp_int *b, mp_int *c) MP_WUR; -MP_PRIVATE mp_err s_mp_mul_digs_fast(const mp_int *a, const mp_int *b, mp_int *c, int digs) MP_WUR; -MP_PRIVATE mp_err s_mp_mul_digs(const mp_int *a, const mp_int *b, mp_int *c, int digs) MP_WUR; -MP_PRIVATE mp_err s_mp_mul_high_digs_fast(const mp_int *a, const mp_int *b, mp_int *c, int digs) MP_WUR; -MP_PRIVATE mp_err s_mp_mul_high_digs(const mp_int *a, const mp_int *b, mp_int *c, int digs) MP_WUR; -MP_PRIVATE mp_err s_mp_sqr_fast(const mp_int *a, mp_int *b) MP_WUR; +MP_PRIVATE mp_err s_mp_mul_comba(const mp_int *a, const mp_int *b, mp_int *c, int digs) MP_WUR; +MP_PRIVATE mp_err s_mp_mul(const mp_int *a, const mp_int *b, mp_int *c, int digs) MP_WUR; +MP_PRIVATE mp_err s_mp_mul_high_comba(const mp_int *a, const mp_int *b, mp_int *c, int digs) MP_WUR; +MP_PRIVATE mp_err s_mp_mul_high(const mp_int *a, const mp_int *b, mp_int *c, int digs) MP_WUR; +MP_PRIVATE mp_err s_mp_sqr_comba(const mp_int *a, mp_int *b) MP_WUR; MP_PRIVATE mp_err s_mp_sqr(const mp_int *a, mp_int *b) MP_WUR; -MP_PRIVATE mp_err s_mp_balance_mul(const mp_int *a, const mp_int *b, mp_int *c) MP_WUR; -MP_PRIVATE mp_err s_mp_karatsuba_mul(const mp_int *a, const mp_int *b, mp_int *c) MP_WUR; -MP_PRIVATE mp_err s_mp_toom_mul(const mp_int *a, const mp_int *b, mp_int *c) MP_WUR; -MP_PRIVATE mp_err s_mp_karatsuba_sqr(const mp_int *a, mp_int *b) MP_WUR; -MP_PRIVATE mp_err s_mp_toom_sqr(const mp_int *a, mp_int *b) MP_WUR; -MP_PRIVATE mp_err s_mp_invmod_fast(const mp_int *a, const mp_int *b, mp_int *c) MP_WUR; -MP_PRIVATE mp_err s_mp_invmod_slow(const mp_int *a, const mp_int *b, mp_int *c) MP_WUR; -MP_PRIVATE mp_err s_mp_montgomery_reduce_fast(mp_int *x, const mp_int *n, mp_digit rho) MP_WUR; +MP_PRIVATE mp_err s_mp_mul_balance(const mp_int *a, const mp_int *b, mp_int *c) MP_WUR; +MP_PRIVATE mp_err s_mp_mul_karatsuba(const mp_int *a, const mp_int *b, mp_int *c) MP_WUR; +MP_PRIVATE mp_err s_mp_mul_toom(const mp_int *a, const mp_int *b, mp_int *c) MP_WUR; +MP_PRIVATE mp_err s_mp_sqr_karatsuba(const mp_int *a, mp_int *b) MP_WUR; +MP_PRIVATE mp_err s_mp_sqr_toom(const mp_int *a, mp_int *b) MP_WUR; +MP_PRIVATE mp_err s_mp_invmod_odd(const mp_int *a, const mp_int *b, mp_int *c) MP_WUR; +MP_PRIVATE mp_err s_mp_invmod(const mp_int *a, const mp_int *b, mp_int *c) MP_WUR; +MP_PRIVATE mp_err s_mp_montgomery_reduce_comba(mp_int *x, const mp_int *n, mp_digit rho) MP_WUR; MP_PRIVATE mp_err s_mp_exptmod_fast(const mp_int *G, const mp_int *X, const mp_int *P, mp_int *Y, int redmode) MP_WUR; MP_PRIVATE mp_err s_mp_exptmod(const mp_int *G, const mp_int *X, const mp_int *P, mp_int *Y, int redmode) MP_WUR; MP_PRIVATE mp_err s_mp_rand_platform(void *p, size_t n) MP_WUR; @@ -215,6 +185,9 @@ MP_PRIVATE uint32_t s_mp_log_pow2(const mp_int *a, uint32_t base); MP_PRIVATE mp_err s_mp_div_recursive(const mp_int *a, const mp_int *b, mp_int *q, mp_int *r); MP_PRIVATE mp_err s_mp_div_school(const mp_int *a, const mp_int *b, mp_int *c, mp_int *d); MP_PRIVATE mp_err s_mp_div_small(const mp_int *a, const mp_int *b, mp_int *c, mp_int *d); +MP_PRIVATE void s_mp_zero_buf(void *mem, size_t size); +MP_PRIVATE void s_mp_zero_digs(mp_digit *d, int digits); +MP_PRIVATE void s_mp_copy_digs(mp_digit *d, const mp_digit *s, int digits); /* TODO: jenkins prng is not thread safe as of now */ MP_PRIVATE mp_err s_mp_rand_jenkins(void *p, size_t n) MP_WUR; @@ -247,7 +220,7 @@ extern MP_PRIVATE const mp_digit s_mp_prime_tab[]; } \ a->used = i; \ a->sign = MP_ZPOS; \ - MP_ZERO_DIGITS(a->dp + a->used, a->alloc - a->used); \ + s_mp_zero_digs(a->dp + a->used, a->alloc - a->used); \ } #define MP_SET_SIGNED(name, uname, type, utype) \ diff --git a/tommath_superclass.h b/tommath_superclass.h index 6961a59..db927d6 100644 --- a/tommath_superclass.h +++ b/tommath_superclass.h @@ -76,23 +76,23 @@ * like removing support for even moduli, etc... */ # ifdef LTM_LAST -# undef MP_DR_IS_MODULUS_C -# undef MP_DR_SETUP_C -# undef MP_DR_REDUCE_C # undef MP_DIV_3_C -# undef MP_REDUCE_2K_SETUP_C +# undef MP_DR_IS_MODULUS_C +# undef MP_DR_REDUCE_C +# undef MP_DR_SETUP_C # undef MP_REDUCE_2K_C +# undef MP_REDUCE_2K_SETUP_C # undef MP_REDUCE_IS_2K_C # undef MP_REDUCE_SETUP_C -# undef S_MP_BALANCE_MUL_C # undef S_MP_EXPTMOD_C -# undef S_MP_INVMOD_FAST_C -# undef S_MP_KARATSUBA_MUL_C -# undef S_MP_KARATSUBA_SQR_C -# undef S_MP_MUL_HIGH_DIGS_C -# undef S_MP_MUL_HIGH_DIGS_FAST_C -# undef S_MP_TOOM_MUL_C -# undef S_MP_TOOM_SQR_C +# undef S_MP_INVMOD_ODD_C +# undef S_MP_MUL_BALANCE_C +# undef S_MP_MUL_HIGH_C +# undef S_MP_MUL_HIGH_COMBA_C +# undef S_MP_MUL_KARATSUBA_C +# undef S_MP_MUL_TOOM_C +# undef S_MP_SQR_KARATSUBA_C +# undef S_MP_SQR_TOOM_C # ifndef SC_RSA_1_WITH_TESTS # undef MP_REDUCE_C @@ -104,7 +104,7 @@ * trouble. */ # undef MP_MONTGOMERY_REDUCE_C -# undef S_MP_MUL_DIGS_C +# undef S_MP_MUL_C # undef S_MP_SQR_C # endif