Merge pull request #450 from libtom/sqr-opt

make mp_sqr and mp_div_3 internal and add optimizations
This commit is contained in:
Steffen Jaeckel 2019-11-10 15:30:57 +01:00 committed by GitHub
commit 0bc5c3292d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
20 changed files with 144 additions and 157 deletions

View File

@ -1241,14 +1241,14 @@ LBL_ERR:
return EXIT_FAILURE;
}
static int test_mp_div_3(void)
static int test_s_mp_div_3(void)
{
int cnt;
mp_int a, b, c, d, e;
DOR(mp_init_multi(&a, &b, &c, &d, &e, NULL));
/* test mp_div_3 */
/* test s_mp_div_3 */
mp_set(&d, 3u);
for (cnt = 0; cnt < 10000;) {
mp_digit r2;
@ -1259,10 +1259,10 @@ static int test_mp_div_3(void)
}
DO(mp_rand(&a, (abs(rand_int()) % 128) + 1));
DO(mp_div(&a, &d, &b, &e));
DO(mp_div_3(&a, &c, &r2));
DO(s_mp_div_3(&a, &c, &r2));
if (mp_cmp(&b, &c) || mp_cmp_d(&e, r2)) {
printf("\nmp_div_3 => Failure\n");
printf("\ns_mp_div_3 => Failure\n");
goto LBL_ERR;
}
}
@ -2297,7 +2297,7 @@ static int unit_tests(int argc, char **argv)
T1(mp_cnt_lsb, MP_CNT_LSB),
T1(mp_complement, MP_COMPLEMENT),
T1(mp_decr, MP_SUB_D),
T1(mp_div_3, MP_DIV_3),
T1(s_mp_div_3, S_MP_DIV_3),
T1(mp_dr_reduce, MP_DR_REDUCE),
T2(mp_pack_unpack,MP_PACK, MP_UNPACK),
T2(mp_fread_fwrite, MP_FREAD, MP_FWRITE),

View File

@ -2605,14 +2605,6 @@ mp_err mp_incr(mp_int *a);
mp_err mp_decr(mp_int *a);
\end{alltt}
The division by three can be made faster by replacing the division with a multiplication by the
multiplicative inverse of three.
\index{mp\_div\_3}
\begin{alltt}
mp_err mp_div_3(const mp_int *a, mp_int *c, mp_digit *d);
\end{alltt}
\chapter{Little Helpers}
It is never wrong to have some useful little shortcuts at hand.
\section{Function Macros}

View File

@ -392,10 +392,6 @@
RelativePath="mp_div_2d.c"
>
</File>
<File
RelativePath="mp_div_3.c"
>
</File>
<File
RelativePath="mp_div_d.c"
>
@ -760,10 +756,6 @@
RelativePath="mp_signed_rsh.c"
>
</File>
<File
RelativePath="mp_sqr.c"
>
</File>
<File
RelativePath="mp_sqrmod.c"
>
@ -824,6 +816,10 @@
RelativePath="s_mp_copy_digs.c"
>
</File>
<File
RelativePath="s_mp_div_3.c"
>
</File>
<File
RelativePath="s_mp_div_recursive.c"
>

View File

@ -28,7 +28,7 @@ LCOV_ARGS=--directory .
#START_INS
OBJECTS=mp_2expt.o mp_abs.o mp_add.o mp_add_d.o mp_addmod.o mp_and.o mp_clamp.o mp_clear.o mp_clear_multi.o \
mp_cmp.o mp_cmp_d.o mp_cmp_mag.o mp_cnt_lsb.o mp_complement.o mp_copy.o mp_count_bits.o mp_cutoffs.o \
mp_div.o mp_div_2.o mp_div_2d.o mp_div_3.o mp_div_d.o mp_dr_is_modulus.o mp_dr_reduce.o mp_dr_setup.o \
mp_div.o mp_div_2.o mp_div_2d.o mp_div_d.o mp_dr_is_modulus.o mp_dr_reduce.o mp_dr_setup.o \
mp_error_to_string.o mp_exch.o mp_expt_u32.o mp_exptmod.o mp_exteuclid.o mp_fread.o mp_from_sbin.o \
mp_from_ubin.o mp_fwrite.o mp_gcd.o mp_get_double.o mp_get_i32.o mp_get_i64.o mp_get_l.o mp_get_ll.o \
mp_get_mag_u32.o mp_get_mag_u64.o mp_get_mag_ul.o mp_get_mag_ull.o mp_grow.o mp_init.o mp_init_copy.o \
@ -42,11 +42,11 @@ mp_prime_strong_lucas_selfridge.o mp_radix_size.o mp_rand.o mp_read_radix.o mp_r
mp_reduce_2k_l.o mp_reduce_2k_setup.o mp_reduce_2k_setup_l.o mp_reduce_is_2k.o mp_reduce_is_2k_l.o \
mp_reduce_setup.o mp_root_u32.o mp_rshd.o mp_sbin_size.o mp_set.o mp_set_double.o mp_set_i32.o mp_set_i64.o \
mp_set_l.o mp_set_ll.o mp_set_u32.o mp_set_u64.o mp_set_ul.o mp_set_ull.o mp_shrink.o mp_signed_rsh.o \
mp_sqr.o mp_sqrmod.o mp_sqrt.o mp_sqrtmod_prime.o mp_sub.o mp_sub_d.o mp_submod.o mp_to_radix.o mp_to_sbin.o \
mp_to_ubin.o mp_ubin_size.o mp_unpack.o mp_xor.o mp_zero.o s_mp_add.o s_mp_copy_digs.o s_mp_div_recursive.o \
s_mp_div_school.o s_mp_div_small.o s_mp_exptmod.o s_mp_exptmod_fast.o s_mp_get_bit.o s_mp_invmod.o \
s_mp_invmod_odd.o s_mp_log.o s_mp_log_d.o s_mp_log_pow2.o s_mp_montgomery_reduce_comba.o s_mp_mul.o \
s_mp_mul_balance.o s_mp_mul_comba.o s_mp_mul_high.o s_mp_mul_high_comba.o s_mp_mul_karatsuba.o \
mp_sqrmod.o mp_sqrt.o mp_sqrtmod_prime.o mp_sub.o mp_sub_d.o mp_submod.o mp_to_radix.o mp_to_sbin.o \
mp_to_ubin.o mp_ubin_size.o mp_unpack.o mp_xor.o mp_zero.o s_mp_add.o s_mp_copy_digs.o s_mp_div_3.o \
s_mp_div_recursive.o s_mp_div_school.o s_mp_div_small.o s_mp_exptmod.o s_mp_exptmod_fast.o s_mp_get_bit.o \
s_mp_invmod.o s_mp_invmod_odd.o s_mp_log.o s_mp_log_d.o s_mp_log_pow2.o s_mp_montgomery_reduce_comba.o \
s_mp_mul.o s_mp_mul_balance.o s_mp_mul_comba.o s_mp_mul_high.o s_mp_mul_high_comba.o s_mp_mul_karatsuba.o \
s_mp_mul_toom.o s_mp_prime_is_divisible.o s_mp_prime_tab.o s_mp_radix_map.o s_mp_rand_jenkins.o \
s_mp_rand_platform.o s_mp_sqr.o s_mp_sqr_comba.o s_mp_sqr_karatsuba.o s_mp_sqr_toom.o s_mp_sub.o \
s_mp_zero_buf.o s_mp_zero_digs.o

View File

@ -30,7 +30,7 @@ LIBMAIN_D =libtommath.dll
#List of objects to compile (all goes to libtommath.a)
OBJECTS=mp_2expt.o mp_abs.o mp_add.o mp_add_d.o mp_addmod.o mp_and.o mp_clamp.o mp_clear.o mp_clear_multi.o \
mp_cmp.o mp_cmp_d.o mp_cmp_mag.o mp_cnt_lsb.o mp_complement.o mp_copy.o mp_count_bits.o mp_cutoffs.o \
mp_div.o mp_div_2.o mp_div_2d.o mp_div_3.o mp_div_d.o mp_dr_is_modulus.o mp_dr_reduce.o mp_dr_setup.o \
mp_div.o mp_div_2.o mp_div_2d.o mp_div_d.o mp_dr_is_modulus.o mp_dr_reduce.o mp_dr_setup.o \
mp_error_to_string.o mp_exch.o mp_expt_u32.o mp_exptmod.o mp_exteuclid.o mp_fread.o mp_from_sbin.o \
mp_from_ubin.o mp_fwrite.o mp_gcd.o mp_get_double.o mp_get_i32.o mp_get_i64.o mp_get_l.o mp_get_ll.o \
mp_get_mag_u32.o mp_get_mag_u64.o mp_get_mag_ul.o mp_get_mag_ull.o mp_grow.o mp_init.o mp_init_copy.o \
@ -44,11 +44,11 @@ mp_prime_strong_lucas_selfridge.o mp_radix_size.o mp_rand.o mp_read_radix.o mp_r
mp_reduce_2k_l.o mp_reduce_2k_setup.o mp_reduce_2k_setup_l.o mp_reduce_is_2k.o mp_reduce_is_2k_l.o \
mp_reduce_setup.o mp_root_u32.o mp_rshd.o mp_sbin_size.o mp_set.o mp_set_double.o mp_set_i32.o mp_set_i64.o \
mp_set_l.o mp_set_ll.o mp_set_u32.o mp_set_u64.o mp_set_ul.o mp_set_ull.o mp_shrink.o mp_signed_rsh.o \
mp_sqr.o mp_sqrmod.o mp_sqrt.o mp_sqrtmod_prime.o mp_sub.o mp_sub_d.o mp_submod.o mp_to_radix.o mp_to_sbin.o \
mp_to_ubin.o mp_ubin_size.o mp_unpack.o mp_xor.o mp_zero.o s_mp_add.o s_mp_copy_digs.o s_mp_div_recursive.o \
s_mp_div_school.o s_mp_div_small.o s_mp_exptmod.o s_mp_exptmod_fast.o s_mp_get_bit.o s_mp_invmod.o \
s_mp_invmod_odd.o s_mp_log.o s_mp_log_d.o s_mp_log_pow2.o s_mp_montgomery_reduce_comba.o s_mp_mul.o \
s_mp_mul_balance.o s_mp_mul_comba.o s_mp_mul_high.o s_mp_mul_high_comba.o s_mp_mul_karatsuba.o \
mp_sqrmod.o mp_sqrt.o mp_sqrtmod_prime.o mp_sub.o mp_sub_d.o mp_submod.o mp_to_radix.o mp_to_sbin.o \
mp_to_ubin.o mp_ubin_size.o mp_unpack.o mp_xor.o mp_zero.o s_mp_add.o s_mp_copy_digs.o s_mp_div_3.o \
s_mp_div_recursive.o s_mp_div_school.o s_mp_div_small.o s_mp_exptmod.o s_mp_exptmod_fast.o s_mp_get_bit.o \
s_mp_invmod.o s_mp_invmod_odd.o s_mp_log.o s_mp_log_d.o s_mp_log_pow2.o s_mp_montgomery_reduce_comba.o \
s_mp_mul.o s_mp_mul_balance.o s_mp_mul_comba.o s_mp_mul_high.o s_mp_mul_high_comba.o s_mp_mul_karatsuba.o \
s_mp_mul_toom.o s_mp_prime_is_divisible.o s_mp_prime_tab.o s_mp_radix_map.o s_mp_rand_jenkins.o \
s_mp_rand_platform.o s_mp_sqr.o s_mp_sqr_comba.o s_mp_sqr_karatsuba.o s_mp_sqr_toom.o s_mp_sub.o \
s_mp_zero_buf.o s_mp_zero_digs.o

View File

@ -23,7 +23,7 @@ LIBMAIN_S =tommath.lib
#List of objects to compile (all goes to tommath.lib)
OBJECTS=mp_2expt.obj mp_abs.obj mp_add.obj mp_add_d.obj mp_addmod.obj mp_and.obj mp_clamp.obj mp_clear.obj mp_clear_multi.obj \
mp_cmp.obj mp_cmp_d.obj mp_cmp_mag.obj mp_cnt_lsb.obj mp_complement.obj mp_copy.obj mp_count_bits.obj mp_cutoffs.obj \
mp_div.obj mp_div_2.obj mp_div_2d.obj mp_div_3.obj mp_div_d.obj mp_dr_is_modulus.obj mp_dr_reduce.obj mp_dr_setup.obj \
mp_div.obj mp_div_2.obj mp_div_2d.obj mp_div_d.obj mp_dr_is_modulus.obj mp_dr_reduce.obj mp_dr_setup.obj \
mp_error_to_string.obj mp_exch.obj mp_expt_u32.obj mp_exptmod.obj mp_exteuclid.obj mp_fread.obj mp_from_sbin.obj \
mp_from_ubin.obj mp_fwrite.obj mp_gcd.obj mp_get_double.obj mp_get_i32.obj mp_get_i64.obj mp_get_l.obj mp_get_ll.obj \
mp_get_mag_u32.obj mp_get_mag_u64.obj mp_get_mag_ul.obj mp_get_mag_ull.obj mp_grow.obj mp_init.obj mp_init_copy.obj \
@ -37,11 +37,11 @@ mp_prime_strong_lucas_selfridge.obj mp_radix_size.obj mp_rand.obj mp_read_radix.
mp_reduce_2k_l.obj mp_reduce_2k_setup.obj mp_reduce_2k_setup_l.obj mp_reduce_is_2k.obj mp_reduce_is_2k_l.obj \
mp_reduce_setup.obj mp_root_u32.obj mp_rshd.obj mp_sbin_size.obj mp_set.obj mp_set_double.obj mp_set_i32.obj mp_set_i64.obj \
mp_set_l.obj mp_set_ll.obj mp_set_u32.obj mp_set_u64.obj mp_set_ul.obj mp_set_ull.obj mp_shrink.obj mp_signed_rsh.obj \
mp_sqr.obj mp_sqrmod.obj mp_sqrt.obj mp_sqrtmod_prime.obj mp_sub.obj mp_sub_d.obj mp_submod.obj mp_to_radix.obj mp_to_sbin.obj \
mp_to_ubin.obj mp_ubin_size.obj mp_unpack.obj mp_xor.obj mp_zero.obj s_mp_add.obj s_mp_copy_digs.obj s_mp_div_recursive.obj \
s_mp_div_school.obj s_mp_div_small.obj s_mp_exptmod.obj s_mp_exptmod_fast.obj s_mp_get_bit.obj s_mp_invmod.obj \
s_mp_invmod_odd.obj s_mp_log.obj s_mp_log_d.obj s_mp_log_pow2.obj s_mp_montgomery_reduce_comba.obj s_mp_mul.obj \
s_mp_mul_balance.obj s_mp_mul_comba.obj s_mp_mul_high.obj s_mp_mul_high_comba.obj s_mp_mul_karatsuba.obj \
mp_sqrmod.obj mp_sqrt.obj mp_sqrtmod_prime.obj mp_sub.obj mp_sub_d.obj mp_submod.obj mp_to_radix.obj mp_to_sbin.obj \
mp_to_ubin.obj mp_ubin_size.obj mp_unpack.obj mp_xor.obj mp_zero.obj s_mp_add.obj s_mp_copy_digs.obj s_mp_div_3.obj \
s_mp_div_recursive.obj s_mp_div_school.obj s_mp_div_small.obj s_mp_exptmod.obj s_mp_exptmod_fast.obj s_mp_get_bit.obj \
s_mp_invmod.obj s_mp_invmod_odd.obj s_mp_log.obj s_mp_log_d.obj s_mp_log_pow2.obj s_mp_montgomery_reduce_comba.obj \
s_mp_mul.obj s_mp_mul_balance.obj s_mp_mul_comba.obj s_mp_mul_high.obj s_mp_mul_high_comba.obj s_mp_mul_karatsuba.obj \
s_mp_mul_toom.obj s_mp_prime_is_divisible.obj s_mp_prime_tab.obj s_mp_radix_map.obj s_mp_rand_jenkins.obj \
s_mp_rand_platform.obj s_mp_sqr.obj s_mp_sqr_comba.obj s_mp_sqr_karatsuba.obj s_mp_sqr_toom.obj s_mp_sub.obj \
s_mp_zero_buf.obj s_mp_zero_digs.obj

View File

@ -25,7 +25,7 @@ LCOV_ARGS=--directory .libs --directory .
#START_INS
OBJECTS=mp_2expt.o mp_abs.o mp_add.o mp_add_d.o mp_addmod.o mp_and.o mp_clamp.o mp_clear.o mp_clear_multi.o \
mp_cmp.o mp_cmp_d.o mp_cmp_mag.o mp_cnt_lsb.o mp_complement.o mp_copy.o mp_count_bits.o mp_cutoffs.o \
mp_div.o mp_div_2.o mp_div_2d.o mp_div_3.o mp_div_d.o mp_dr_is_modulus.o mp_dr_reduce.o mp_dr_setup.o \
mp_div.o mp_div_2.o mp_div_2d.o mp_div_d.o mp_dr_is_modulus.o mp_dr_reduce.o mp_dr_setup.o \
mp_error_to_string.o mp_exch.o mp_expt_u32.o mp_exptmod.o mp_exteuclid.o mp_fread.o mp_from_sbin.o \
mp_from_ubin.o mp_fwrite.o mp_gcd.o mp_get_double.o mp_get_i32.o mp_get_i64.o mp_get_l.o mp_get_ll.o \
mp_get_mag_u32.o mp_get_mag_u64.o mp_get_mag_ul.o mp_get_mag_ull.o mp_grow.o mp_init.o mp_init_copy.o \
@ -39,11 +39,11 @@ mp_prime_strong_lucas_selfridge.o mp_radix_size.o mp_rand.o mp_read_radix.o mp_r
mp_reduce_2k_l.o mp_reduce_2k_setup.o mp_reduce_2k_setup_l.o mp_reduce_is_2k.o mp_reduce_is_2k_l.o \
mp_reduce_setup.o mp_root_u32.o mp_rshd.o mp_sbin_size.o mp_set.o mp_set_double.o mp_set_i32.o mp_set_i64.o \
mp_set_l.o mp_set_ll.o mp_set_u32.o mp_set_u64.o mp_set_ul.o mp_set_ull.o mp_shrink.o mp_signed_rsh.o \
mp_sqr.o mp_sqrmod.o mp_sqrt.o mp_sqrtmod_prime.o mp_sub.o mp_sub_d.o mp_submod.o mp_to_radix.o mp_to_sbin.o \
mp_to_ubin.o mp_ubin_size.o mp_unpack.o mp_xor.o mp_zero.o s_mp_add.o s_mp_copy_digs.o s_mp_div_recursive.o \
s_mp_div_school.o s_mp_div_small.o s_mp_exptmod.o s_mp_exptmod_fast.o s_mp_get_bit.o s_mp_invmod.o \
s_mp_invmod_odd.o s_mp_log.o s_mp_log_d.o s_mp_log_pow2.o s_mp_montgomery_reduce_comba.o s_mp_mul.o \
s_mp_mul_balance.o s_mp_mul_comba.o s_mp_mul_high.o s_mp_mul_high_comba.o s_mp_mul_karatsuba.o \
mp_sqrmod.o mp_sqrt.o mp_sqrtmod_prime.o mp_sub.o mp_sub_d.o mp_submod.o mp_to_radix.o mp_to_sbin.o \
mp_to_ubin.o mp_ubin_size.o mp_unpack.o mp_xor.o mp_zero.o s_mp_add.o s_mp_copy_digs.o s_mp_div_3.o \
s_mp_div_recursive.o s_mp_div_school.o s_mp_div_small.o s_mp_exptmod.o s_mp_exptmod_fast.o s_mp_get_bit.o \
s_mp_invmod.o s_mp_invmod_odd.o s_mp_log.o s_mp_log_d.o s_mp_log_pow2.o s_mp_montgomery_reduce_comba.o \
s_mp_mul.o s_mp_mul_balance.o s_mp_mul_comba.o s_mp_mul_high.o s_mp_mul_high_comba.o s_mp_mul_karatsuba.o \
s_mp_mul_toom.o s_mp_prime_is_divisible.o s_mp_prime_tab.o s_mp_radix_map.o s_mp_rand_jenkins.o \
s_mp_rand_platform.o s_mp_sqr.o s_mp_sqr_comba.o s_mp_sqr_karatsuba.o s_mp_sqr_toom.o s_mp_sub.o \
s_mp_zero_buf.o s_mp_zero_digs.o

View File

@ -31,7 +31,7 @@ LIBMAIN_S = libtommath.a
OBJECTS=mp_2expt.o mp_abs.o mp_add.o mp_add_d.o mp_addmod.o mp_and.o mp_clamp.o mp_clear.o mp_clear_multi.o \
mp_cmp.o mp_cmp_d.o mp_cmp_mag.o mp_cnt_lsb.o mp_complement.o mp_copy.o mp_count_bits.o mp_cutoffs.o \
mp_div.o mp_div_2.o mp_div_2d.o mp_div_3.o mp_div_d.o mp_dr_is_modulus.o mp_dr_reduce.o mp_dr_setup.o \
mp_div.o mp_div_2.o mp_div_2d.o mp_div_d.o mp_dr_is_modulus.o mp_dr_reduce.o mp_dr_setup.o \
mp_error_to_string.o mp_exch.o mp_expt_u32.o mp_exptmod.o mp_exteuclid.o mp_fread.o mp_from_sbin.o \
mp_from_ubin.o mp_fwrite.o mp_gcd.o mp_get_double.o mp_get_i32.o mp_get_i64.o mp_get_l.o mp_get_ll.o \
mp_get_mag_u32.o mp_get_mag_u64.o mp_get_mag_ul.o mp_get_mag_ull.o mp_grow.o mp_init.o mp_init_copy.o \
@ -45,11 +45,11 @@ mp_prime_strong_lucas_selfridge.o mp_radix_size.o mp_rand.o mp_read_radix.o mp_r
mp_reduce_2k_l.o mp_reduce_2k_setup.o mp_reduce_2k_setup_l.o mp_reduce_is_2k.o mp_reduce_is_2k_l.o \
mp_reduce_setup.o mp_root_u32.o mp_rshd.o mp_sbin_size.o mp_set.o mp_set_double.o mp_set_i32.o mp_set_i64.o \
mp_set_l.o mp_set_ll.o mp_set_u32.o mp_set_u64.o mp_set_ul.o mp_set_ull.o mp_shrink.o mp_signed_rsh.o \
mp_sqr.o mp_sqrmod.o mp_sqrt.o mp_sqrtmod_prime.o mp_sub.o mp_sub_d.o mp_submod.o mp_to_radix.o mp_to_sbin.o \
mp_to_ubin.o mp_ubin_size.o mp_unpack.o mp_xor.o mp_zero.o s_mp_add.o s_mp_copy_digs.o s_mp_div_recursive.o \
s_mp_div_school.o s_mp_div_small.o s_mp_exptmod.o s_mp_exptmod_fast.o s_mp_get_bit.o s_mp_invmod.o \
s_mp_invmod_odd.o s_mp_log.o s_mp_log_d.o s_mp_log_pow2.o s_mp_montgomery_reduce_comba.o s_mp_mul.o \
s_mp_mul_balance.o s_mp_mul_comba.o s_mp_mul_high.o s_mp_mul_high_comba.o s_mp_mul_karatsuba.o \
mp_sqrmod.o mp_sqrt.o mp_sqrtmod_prime.o mp_sub.o mp_sub_d.o mp_submod.o mp_to_radix.o mp_to_sbin.o \
mp_to_ubin.o mp_ubin_size.o mp_unpack.o mp_xor.o mp_zero.o s_mp_add.o s_mp_copy_digs.o s_mp_div_3.o \
s_mp_div_recursive.o s_mp_div_school.o s_mp_div_small.o s_mp_exptmod.o s_mp_exptmod_fast.o s_mp_get_bit.o \
s_mp_invmod.o s_mp_invmod_odd.o s_mp_log.o s_mp_log_d.o s_mp_log_pow2.o s_mp_montgomery_reduce_comba.o \
s_mp_mul.o s_mp_mul_balance.o s_mp_mul_comba.o s_mp_mul_high.o s_mp_mul_high_comba.o s_mp_mul_karatsuba.o \
s_mp_mul_toom.o s_mp_prime_is_divisible.o s_mp_prime_tab.o s_mp_radix_map.o s_mp_rand_jenkins.o \
s_mp_rand_platform.o s_mp_sqr.o s_mp_sqr_comba.o s_mp_sqr_karatsuba.o s_mp_sqr_toom.o s_mp_sub.o \
s_mp_zero_buf.o s_mp_zero_digs.o

View File

@ -28,7 +28,13 @@ mp_err mp_div_d(const mp_int *a, mp_digit b, mp_int *c, mp_digit *d)
}
/* power of two ? */
if ((b & (b - 1u)) == 0u) {
if (MP_HAS(MP_DIV_2) && (b == 2u)) {
if (d != NULL) {
*d = mp_isodd(a) ? 1u : 0u;
}
return (c == NULL) ? MP_OKAY : mp_div_2(a, c);
}
if (MP_HAS(MP_DIV_2D) && MP_IS_2EXPT(b)) {
ix = 1;
while ((ix < MP_DIGIT_BIT) && (b != (((mp_digit)1)<<ix))) {
ix++;
@ -36,15 +42,12 @@ mp_err mp_div_d(const mp_int *a, mp_digit b, mp_int *c, mp_digit *d)
if (d != NULL) {
*d = a->dp[0] & (((mp_digit)1<<(mp_digit)ix) - 1uL);
}
if (c != NULL) {
return mp_div_2d(a, ix, c, NULL);
}
return MP_OKAY;
return (c == NULL) ? MP_OKAY : mp_div_2d(a, ix, c, NULL);
}
/* three? */
if (MP_HAS(MP_DIV_3) && (b == 3u)) {
return mp_div_3(a, c, d);
if (MP_HAS(S_MP_DIV_3) && (b == 3u)) {
return s_mp_div_3(a, c, d);
}
/* no easy answer [c'est la vie]. Just division */

View File

@ -17,7 +17,7 @@ mp_err mp_log_u32(const mp_int *a, uint32_t base, uint32_t *c)
return MP_VAL;
}
if (MP_HAS(S_MP_LOG_POW2) && ((base & (base - 1u)) == 0u)) {
if (MP_HAS(S_MP_LOG_POW2) && MP_IS_2EXPT(base)) {
*c = s_mp_log_pow2(a, base);
return MP_OKAY;
}

View File

@ -12,18 +12,34 @@ mp_err mp_mul(const mp_int *a, const mp_int *b, mp_int *c)
digs = a->used + b->used + 1;
mp_sign neg = (a->sign == b->sign) ? MP_ZPOS : MP_NEG;
if (MP_HAS(S_MP_MUL_BALANCE) &&
/* Check sizes. The smaller one needs to be larger than the Karatsuba cut-off.
* The bigger one needs to be at least about one MP_MUL_KARATSUBA_CUTOFF bigger
* to make some sense, but it depends on architecture, OS, position of the
* stars... so YMMV.
* Using it to cut the input into slices small enough for s_mp_mul_comba
* was actually slower on the author's machine, but YMMV.
*/
(min >= MP_MUL_KARATSUBA_CUTOFF) &&
((max / 2) >= MP_MUL_KARATSUBA_CUTOFF) &&
/* Not much effect was observed below a ratio of 1:2, but again: YMMV. */
(max >= (2 * min))) {
if ((a == b) &&
MP_HAS(S_MP_SQR_TOOM) && /* use Toom-Cook? */
(a->used >= MP_SQR_TOOM_CUTOFF)) {
err = s_mp_sqr_toom(a, c);
} else if ((a == b) &&
MP_HAS(S_MP_SQR_KARATSUBA) && /* Karatsuba? */
(a->used >= MP_SQR_KARATSUBA_CUTOFF)) {
err = s_mp_sqr_karatsuba(a, c);
} else if ((a == b) &&
MP_HAS(S_MP_SQR_COMBA) && /* can we use the fast comba multiplier? */
(((a->used * 2) + 1) < MP_WARRAY) &&
(a->used < (MP_MAX_COMBA / 2))) {
err = s_mp_sqr_comba(a, c);
} else if ((a == b) &&
MP_HAS(S_MP_SQR)) {
err = s_mp_sqr(a, c);
} else if (MP_HAS(S_MP_MUL_BALANCE) &&
/* Check sizes. The smaller one needs to be larger than the Karatsuba cut-off.
* The bigger one needs to be at least about one MP_MUL_KARATSUBA_CUTOFF bigger
* to make some sense, but it depends on architecture, OS, position of the
* stars... so YMMV.
* Using it to cut the input into slices small enough for s_mp_mul_comba
* was actually slower on the author's machine, but YMMV.
*/
(min >= MP_MUL_KARATSUBA_CUTOFF) &&
((max / 2) >= MP_MUL_KARATSUBA_CUTOFF) &&
/* Not much effect was observed below a ratio of 1:2, but again: YMMV. */
(max >= (2 * min))) {
err = s_mp_mul_balance(a,b,c);
} else if (MP_HAS(S_MP_MUL_TOOM) &&
(min >= MP_MUL_TOOM_CUTOFF)) {

View File

@ -10,6 +10,22 @@ mp_err mp_mul_d(const mp_int *a, mp_digit b, mp_int *c)
mp_err err;
int ix, oldused;
if (b == 1u) {
return mp_copy(a, c);
}
/* power of two ? */
if (MP_HAS(MP_MUL_2) && (b == 2u)) {
return mp_mul_2(a, c);
}
if (MP_HAS(MP_MUL_2D) && MP_IS_2EXPT(b)) {
ix = 1;
while ((ix < MP_DIGIT_BIT) && (b != (((mp_digit)1)<<ix))) {
ix++;
}
return mp_mul_2d(a, ix, c);
}
/* make sure c is big enough to hold a*b */
if ((err = mp_grow(c, a->used + 1)) != MP_OKAY) {
return err;

View File

@ -1,28 +0,0 @@
#include "tommath_private.h"
#ifdef MP_SQR_C
/* LibTomMath, multiple-precision integer library -- Tom St Denis */
/* SPDX-License-Identifier: Unlicense */
/* computes b = a*a */
mp_err mp_sqr(const mp_int *a, mp_int *b)
{
mp_err err;
if (MP_HAS(S_MP_SQR_TOOM) && /* use Toom-Cook? */
(a->used >= MP_SQR_TOOM_CUTOFF)) {
err = s_mp_sqr_toom(a, b);
} else if (MP_HAS(S_MP_SQR_KARATSUBA) && /* Karatsuba? */
(a->used >= MP_SQR_KARATSUBA_CUTOFF)) {
err = s_mp_sqr_karatsuba(a, b);
} else if (MP_HAS(S_MP_SQR_COMBA) && /* can we use the fast comba multiplier? */
(((a->used * 2) + 1) < MP_WARRAY) &&
(a->used < (MP_MAX_COMBA / 2))) {
err = s_mp_sqr_comba(a, b);
} else if (MP_HAS(S_MP_SQR)) {
err = s_mp_sqr(a, b);
} else {
err = MP_VAL;
}
b->sign = MP_ZPOS;
return err;
}
#endif

View File

@ -1,10 +1,10 @@
#include "tommath_private.h"
#ifdef MP_DIV_3_C
#ifdef S_MP_DIV_3_C
/* LibTomMath, multiple-precision integer library -- Tom St Denis */
/* SPDX-License-Identifier: Unlicense */
/* divide by three (based on routine from MPI and the GMP manual) */
mp_err mp_div_3(const mp_int *a, mp_int *c, mp_digit *d)
mp_err s_mp_div_3(const mp_int *a, mp_int *c, mp_digit *d)
{
mp_int q;
mp_word w;

View File

@ -133,7 +133,7 @@ mp_err s_mp_mul_toom(const mp_int *a, const mp_int *b, mp_int *c)
if ((err = mp_sub(&S2, &a1, &S2)) != MP_OKAY) goto LBL_ERR;
/** S2 = S2 / 3; \\ this is an exact division */
if ((err = mp_div_3(&S2, &S2, NULL)) != MP_OKAY) goto LBL_ERR;
if ((err = s_mp_div_3(&S2, &S2, NULL)) != MP_OKAY) goto LBL_ERR;
/** a1 = S1 - a1; */
if ((err = mp_sub(&S1, &a1, &a1)) != MP_OKAY) goto LBL_ERR;

View File

@ -25,7 +25,6 @@ EXPORTS
mp_div
mp_div_2
mp_div_2d
mp_div_3
mp_div_d
mp_dr_is_modulus
mp_dr_reduce
@ -117,7 +116,6 @@ EXPORTS
mp_set_ull
mp_shrink
mp_signed_rsh
mp_sqr
mp_sqrmod
mp_sqrt
mp_sqrtmod_prime

View File

@ -300,9 +300,6 @@ mp_err mp_div_2d(const mp_int *a, int b, mp_int *c, mp_int *d) MP_WUR;
/* b = a/2 */
mp_err mp_div_2(const mp_int *a, mp_int *b) MP_WUR;
/* a/3 => 3c + d == a */
mp_err mp_div_3(const mp_int *a, mp_int *c, mp_digit *d) MP_WUR;
/* c = a * 2**b, implemented as c = a << b */
mp_err mp_mul_2d(const mp_int *a, int b, mp_int *c) MP_WUR;
@ -366,7 +363,7 @@ mp_err mp_sub(const mp_int *a, const mp_int *b, mp_int *c) MP_WUR;
mp_err mp_mul(const mp_int *a, const mp_int *b, mp_int *c) MP_WUR;
/* b = a*a */
mp_err mp_sqr(const mp_int *a, mp_int *b) MP_WUR;
#define mp_sqr(a, b) mp_mul((a), (a), (b))
/* a/b => cb + d == a */
mp_err mp_div(const mp_int *a, const mp_int *b, mp_int *c, mp_int *d) MP_WUR;

View File

@ -31,7 +31,6 @@
# define MP_DIV_C
# define MP_DIV_2_C
# define MP_DIV_2D_C
# define MP_DIV_3_C
# define MP_DIV_D_C
# define MP_DR_IS_MODULUS_C
# define MP_DR_REDUCE_C
@ -123,7 +122,6 @@
# define MP_SET_ULL_C
# define MP_SHRINK_C
# define MP_SIGNED_RSH_C
# define MP_SQR_C
# define MP_SQRMOD_C
# define MP_SQRT_C
# define MP_SQRTMOD_PRIME_C
@ -139,6 +137,7 @@
# define MP_ZERO_C
# define S_MP_ADD_C
# define S_MP_COPY_DIGS_C
# define S_MP_DIV_3_C
# define S_MP_DIV_RECURSIVE_C
# define S_MP_DIV_SCHOOL_C
# define S_MP_DIV_SMALL_C
@ -266,21 +265,15 @@
# define MP_RSHD_C
#endif
#if defined(MP_DIV_3_C)
# define MP_CLAMP_C
# define MP_CLEAR_C
# define MP_EXCH_C
# define MP_INIT_SIZE_C
#endif
#if defined(MP_DIV_D_C)
# define MP_CLAMP_C
# define MP_CLEAR_C
# define MP_COPY_C
# define MP_DIV_2D_C
# define MP_DIV_3_C
# define MP_DIV_2_C
# define MP_EXCH_C
# define MP_INIT_SIZE_C
# define S_MP_DIV_3_C
#endif
#if defined(MP_DR_IS_MODULUS_C)
@ -308,7 +301,6 @@
# define MP_INIT_COPY_C
# define MP_MUL_C
# define MP_SET_C
# define MP_SQR_C
#endif
#if defined(MP_EXPTMOD_C)
@ -480,8 +472,8 @@
# define MP_GET_I32_C
# define MP_INIT_U32_C
# define MP_MOD_C
# define MP_MUL_C
# define MP_SQRT_C
# define MP_SQR_C
#endif
#if defined(MP_KRONECKER_C)
@ -554,6 +546,10 @@
# define S_MP_MUL_COMBA_C
# define S_MP_MUL_KARATSUBA_C
# define S_MP_MUL_TOOM_C
# define S_MP_SQR_C
# define S_MP_SQR_COMBA_C
# define S_MP_SQR_KARATSUBA_C
# define S_MP_SQR_TOOM_C
#endif
#if defined(MP_MUL_2_C)
@ -570,7 +566,10 @@
#if defined(MP_MUL_D_C)
# define MP_CLAMP_C
# define MP_COPY_C
# define MP_GROW_C
# define MP_MUL_2D_C
# define MP_MUL_2_C
# define S_MP_ZERO_DIGS_C
#endif
@ -703,7 +702,6 @@
# define MP_SET_C
# define MP_SET_I32_C
# define MP_SET_U32_C
# define MP_SQR_C
# define MP_SUB_C
# define MP_SUB_D_C
# define S_MP_GET_BIT_C
@ -873,16 +871,9 @@
# define MP_SUB_D_C
#endif
#if defined(MP_SQR_C)
# define S_MP_SQR_C
# define S_MP_SQR_COMBA_C
# define S_MP_SQR_KARATSUBA_C
# define S_MP_SQR_TOOM_C
#endif
#if defined(MP_SQRMOD_C)
# define MP_MOD_C
# define MP_SQR_C
# define MP_MUL_C
#endif
#if defined(MP_SQRT_C)
@ -978,6 +969,13 @@
#if defined(S_MP_COPY_DIGS_C)
#endif
#if defined(S_MP_DIV_3_C)
# define MP_CLAMP_C
# define MP_CLEAR_C
# define MP_EXCH_C
# define MP_INIT_SIZE_C
#endif
#if defined(S_MP_DIV_RECURSIVE_C)
# define MP_ADD_C
# define MP_CLEAR_MULTI_C
@ -1043,7 +1041,6 @@
# define MP_REDUCE_C
# define MP_REDUCE_SETUP_C
# define MP_SET_C
# define MP_SQR_C
#endif
#if defined(S_MP_EXPTMOD_FAST_C)
@ -1063,7 +1060,6 @@
# define MP_REDUCE_2K_C
# define MP_REDUCE_2K_SETUP_C
# define MP_SET_C
# define MP_SQR_C
# define S_MP_MONTGOMERY_REDUCE_COMBA_C
#endif
@ -1110,7 +1106,6 @@
# define MP_INIT_MULTI_C
# define MP_MUL_C
# define MP_SET_C
# define MP_SQR_C
#endif
#if defined(S_MP_LOG_D_C)
@ -1188,7 +1183,6 @@
# define MP_CLEAR_C
# define MP_CLEAR_MULTI_C
# define MP_DIV_2_C
# define MP_DIV_3_C
# define MP_INIT_MULTI_C
# define MP_INIT_SIZE_C
# define MP_LSHD_C
@ -1196,6 +1190,7 @@
# define MP_MUL_C
# define MP_SUB_C
# define S_MP_COPY_DIGS_C
# define S_MP_DIV_3_C
#endif
#if defined(S_MP_PRIME_IS_DIVISIBLE_C)
@ -1234,7 +1229,7 @@
# define MP_CLEAR_C
# define MP_INIT_SIZE_C
# define MP_LSHD_C
# define MP_SQR_C
# define MP_MUL_C
# define S_MP_ADD_C
# define S_MP_COPY_DIGS_C
# define S_MP_SUB_C
@ -1250,7 +1245,6 @@
# define MP_LSHD_C
# define MP_MUL_2_C
# define MP_MUL_C
# define MP_SQR_C
# define MP_SUB_C
# define S_MP_COPY_DIGS_C
#endif

View File

@ -120,6 +120,8 @@ extern void MP_FREE(void *mem, size_t size);
#define MP_EXCH(t, a, b) do { t _c = a; a = b; b = _c; } while (0)
#define MP_IS_2EXPT(x) (((x) != 0u) && (((x) & ((x) - 1u)) == 0u))
/* Static assertion */
#define MP_STATIC_ASSERT(msg, cond) typedef char mp_static_assert_##msg[(cond) ? 1 : -1];
@ -158,36 +160,37 @@ MP_STATIC_ASSERT(prec_geq_min_prec, MP_PREC >= MP_MIN_PREC)
extern MP_PRIVATE mp_err(*s_mp_rand_source)(void *out, size_t size);
/* lowlevel functions, do not call! */
MP_PRIVATE bool s_mp_get_bit(const mp_int *a, int b);
MP_PRIVATE bool s_mp_get_bit(const mp_int *a, int b) MP_WUR;
MP_PRIVATE mp_digit s_mp_log_d(mp_digit base, mp_digit n) MP_WUR;
MP_PRIVATE mp_err s_mp_add(const mp_int *a, const mp_int *b, mp_int *c) MP_WUR;
MP_PRIVATE mp_err s_mp_sub(const mp_int *a, const mp_int *b, mp_int *c) MP_WUR;
MP_PRIVATE mp_err s_mp_mul_comba(const mp_int *a, const mp_int *b, mp_int *c, int digs) MP_WUR;
MP_PRIVATE mp_err s_mp_div_3(const mp_int *a, mp_int *c, mp_digit *d) MP_WUR;
MP_PRIVATE mp_err s_mp_div_recursive(const mp_int *a, const mp_int *b, mp_int *q, mp_int *r) MP_WUR;
MP_PRIVATE mp_err s_mp_div_school(const mp_int *a, const mp_int *b, mp_int *c, mp_int *d) MP_WUR;
MP_PRIVATE mp_err s_mp_div_small(const mp_int *a, const mp_int *b, mp_int *c, mp_int *d) MP_WUR;
MP_PRIVATE mp_err s_mp_exptmod(const mp_int *G, const mp_int *X, const mp_int *P, mp_int *Y, int redmode) MP_WUR;
MP_PRIVATE mp_err s_mp_exptmod_fast(const mp_int *G, const mp_int *X, const mp_int *P, mp_int *Y, int redmode) MP_WUR;
MP_PRIVATE mp_err s_mp_invmod(const mp_int *a, const mp_int *b, mp_int *c) MP_WUR;
MP_PRIVATE mp_err s_mp_invmod_odd(const mp_int *a, const mp_int *b, mp_int *c) MP_WUR;
MP_PRIVATE mp_err s_mp_log(const mp_int *a, uint32_t base, uint32_t *c) MP_WUR;
MP_PRIVATE mp_err s_mp_montgomery_reduce_comba(mp_int *x, const mp_int *n, mp_digit rho) MP_WUR;
MP_PRIVATE mp_err s_mp_mul(const mp_int *a, const mp_int *b, mp_int *c, int digs) MP_WUR;
MP_PRIVATE mp_err s_mp_mul_high_comba(const mp_int *a, const mp_int *b, mp_int *c, int digs) MP_WUR;
MP_PRIVATE mp_err s_mp_mul_high(const mp_int *a, const mp_int *b, mp_int *c, int digs) MP_WUR;
MP_PRIVATE mp_err s_mp_sqr_comba(const mp_int *a, mp_int *b) MP_WUR;
MP_PRIVATE mp_err s_mp_sqr(const mp_int *a, mp_int *b) MP_WUR;
MP_PRIVATE mp_err s_mp_mul_balance(const mp_int *a, const mp_int *b, mp_int *c) MP_WUR;
MP_PRIVATE mp_err s_mp_mul_comba(const mp_int *a, const mp_int *b, mp_int *c, int digs) MP_WUR;
MP_PRIVATE mp_err s_mp_mul_high(const mp_int *a, const mp_int *b, mp_int *c, int digs) MP_WUR;
MP_PRIVATE mp_err s_mp_mul_high_comba(const mp_int *a, const mp_int *b, mp_int *c, int digs) MP_WUR;
MP_PRIVATE mp_err s_mp_mul_karatsuba(const mp_int *a, const mp_int *b, mp_int *c) MP_WUR;
MP_PRIVATE mp_err s_mp_mul_toom(const mp_int *a, const mp_int *b, mp_int *c) MP_WUR;
MP_PRIVATE mp_err s_mp_prime_is_divisible(const mp_int *a, bool *result) MP_WUR;
MP_PRIVATE mp_err s_mp_rand_platform(void *p, size_t n) MP_WUR;
MP_PRIVATE mp_err s_mp_sqr(const mp_int *a, mp_int *b) MP_WUR;
MP_PRIVATE mp_err s_mp_sqr_comba(const mp_int *a, mp_int *b) MP_WUR;
MP_PRIVATE mp_err s_mp_sqr_karatsuba(const mp_int *a, mp_int *b) MP_WUR;
MP_PRIVATE mp_err s_mp_sqr_toom(const mp_int *a, mp_int *b) MP_WUR;
MP_PRIVATE mp_err s_mp_invmod_odd(const mp_int *a, const mp_int *b, mp_int *c) MP_WUR;
MP_PRIVATE mp_err s_mp_invmod(const mp_int *a, const mp_int *b, mp_int *c) MP_WUR;
MP_PRIVATE mp_err s_mp_montgomery_reduce_comba(mp_int *x, const mp_int *n, mp_digit rho) MP_WUR;
MP_PRIVATE mp_err s_mp_exptmod_fast(const mp_int *G, const mp_int *X, const mp_int *P, mp_int *Y, int redmode) MP_WUR;
MP_PRIVATE mp_err s_mp_exptmod(const mp_int *G, const mp_int *X, const mp_int *P, mp_int *Y, int redmode) MP_WUR;
MP_PRIVATE mp_err s_mp_rand_platform(void *p, size_t n) MP_WUR;
MP_PRIVATE mp_err s_mp_prime_is_divisible(const mp_int *a, bool *result);
MP_PRIVATE mp_digit s_mp_log_d(mp_digit base, mp_digit n);
MP_PRIVATE mp_err s_mp_log(const mp_int *a, uint32_t base, uint32_t *c);
MP_PRIVATE uint32_t s_mp_log_pow2(const mp_int *a, uint32_t base);
MP_PRIVATE mp_err s_mp_div_recursive(const mp_int *a, const mp_int *b, mp_int *q, mp_int *r);
MP_PRIVATE mp_err s_mp_div_school(const mp_int *a, const mp_int *b, mp_int *c, mp_int *d);
MP_PRIVATE mp_err s_mp_div_small(const mp_int *a, const mp_int *b, mp_int *c, mp_int *d);
MP_PRIVATE mp_err s_mp_sub(const mp_int *a, const mp_int *b, mp_int *c) MP_WUR;
MP_PRIVATE uint32_t s_mp_log_pow2(const mp_int *a, uint32_t base) MP_WUR;
MP_PRIVATE void s_mp_copy_digs(mp_digit *d, const mp_digit *s, int digits);
MP_PRIVATE void s_mp_zero_buf(void *mem, size_t size);
MP_PRIVATE void s_mp_zero_digs(mp_digit *d, int digits);
MP_PRIVATE void s_mp_copy_digs(mp_digit *d, const mp_digit *s, int digits);
/* TODO: jenkins prng is not thread safe as of now */
MP_PRIVATE mp_err s_mp_rand_jenkins(void *p, size_t n) MP_WUR;

View File

@ -75,7 +75,6 @@
* like removing support for even moduli, etc...
*/
# ifdef LTM_LAST
# undef MP_DIV_3_C
# undef MP_DR_IS_MODULUS_C
# undef MP_DR_REDUCE_C
# undef MP_DR_SETUP_C
@ -83,6 +82,7 @@
# undef MP_REDUCE_2K_SETUP_C
# undef MP_REDUCE_IS_2K_C
# undef MP_REDUCE_SETUP_C
# undef S_MP_DIV_3_C
# undef S_MP_EXPTMOD_C
# undef S_MP_INVMOD_ODD_C
# undef S_MP_MUL_BALANCE_C