From 9edd185f66ba92b1061aefa05c7925e68ffe18b5 Mon Sep 17 00:00:00 2001 From: czurnieden Date: Fri, 4 Oct 2019 17:41:09 +0200 Subject: [PATCH] Addition of fast division (recursive divrem only) --- demo/test.c | 135 +++++++++++++++++++++++ libtommath_VS2008.vcproj | 12 +++ makefile | 12 +-- makefile.mingw | 12 +-- makefile.msvc | 12 +-- makefile.shared | 12 +-- makefile.unix | 12 +-- mp_div.c | 225 ++------------------------------------- s_mp_div_recursive.c | 182 +++++++++++++++++++++++++++++++ s_mp_div_school.c | 158 +++++++++++++++++++++++++++ s_mp_div_small.c | 51 +++++++++ tommath_class.h | 74 ++++++++++--- tommath_private.h | 5 + tommath_superclass.h | 2 +- 14 files changed, 641 insertions(+), 263 deletions(-) create mode 100644 s_mp_div_recursive.c create mode 100644 s_mp_div_school.c create mode 100644 s_mp_div_small.c diff --git a/demo/test.c b/demo/test.c index 838f0ed..bfe934b 100644 --- a/demo/test.c +++ b/demo/test.c @@ -2327,6 +2327,139 @@ LBL_ERR: } +/* Some larger values to test the fast division algorithm */ +static int test_s_mp_div_recursive(void) +{ + mp_int a, b, c_q, c_r, d_q, d_r; + int size, err; + + if ((err = mp_init_multi(&a, &b, &c_q, &c_r, &d_q, &d_r, NULL)) != MP_OKAY) { + goto LBL_ERR; + } + + for (size = MP_KARATSUBA_MUL_CUTOFF; size < 3 * MP_KARATSUBA_MUL_CUTOFF; size += 10) { + fprintf(stderr,"sizes = %d / %d\n", 10 * size, size); + /* Relation 10:1 */ + if ((err = mp_rand(&a, 10 * size)) != MP_OKAY) { + goto LBL_ERR; + } + if ((err = mp_rand(&b, size)) != MP_OKAY) { + goto LBL_ERR; + } + if ((err = s_mp_div_recursive(&a, &b, &c_q, &c_r)) != MP_OKAY) { + goto LBL_ERR; + } + if ((err = s_mp_div_school(&a, &b, &d_q, &d_r)) != MP_OKAY) { + goto LBL_ERR; + } + if (mp_cmp(&c_q, &d_q) != MP_EQ) { + fprintf(stderr, "1. Recursive division failed at sizes %d / %d, wrong quotient\n", + 10 * size, size); + goto LBL_ERR; + } + if (mp_cmp(&c_r, &d_r) != MP_EQ) { + fprintf(stderr, "1. Recursive division failed at sizes %d / %d, wrong remainder\n", + 10 * size, size); + goto LBL_ERR; + } + fprintf(stderr,"sizes = %d / %d\n", 2 * size, size); + /* Relation 2:1 */ + if ((err = mp_rand(&a, 2 * size)) != MP_OKAY) { + goto LBL_ERR; + } + if ((err = mp_rand(&b, size)) != MP_OKAY) { + goto LBL_ERR; + } + if ((err = s_mp_div_recursive(&a, &b, &c_q, &c_r)) != MP_OKAY) { + goto LBL_ERR; + } + if ((err = s_mp_div_school(&a, &b, &d_q, &d_r)) != MP_OKAY) { + goto LBL_ERR; + } + if (mp_cmp(&c_q, &d_q) != MP_EQ) { + fprintf(stderr, "2. Recursive division failed at sizes %d / %d, wrong quotient\n", + 2 * size, size); + goto LBL_ERR; + } + if (mp_cmp(&c_r, &d_r) != MP_EQ) { + fprintf(stderr, "2. Recursive division failed at sizes %d / %d, wrong remainder\n", + 2 * size, size); + goto LBL_ERR; + } + fprintf(stderr,"sizes = %d / %d\n", 3 * size, 2 * size); + /* Upper limit 3:2 */ + if ((err = mp_rand(&a, 3 * size)) != MP_OKAY) { + goto LBL_ERR; + } + if ((err = mp_rand(&b, 2 * size)) != MP_OKAY) { + goto LBL_ERR; + } + if ((err = s_mp_div_recursive(&a, &b, &c_q, &c_r)) != MP_OKAY) { + goto LBL_ERR; + } + if ((err = s_mp_div_school(&a, &b, &d_q, &d_r)) != MP_OKAY) { + goto LBL_ERR; + } + if (mp_cmp(&c_q, &d_q) != MP_EQ) { + fprintf(stderr, "3. Recursive division failed at sizes %d / %d, wrong quotient\n", + 3 * size, 2 * size); + goto LBL_ERR; + } + if (mp_cmp(&c_r, &d_r) != MP_EQ) { + fprintf(stderr, "3. Recursive division failed at sizes %d / %d, wrong remainder\n", + 3 * size, 2 * size); + goto LBL_ERR; + } + } + + mp_clear_multi(&a, &b, &c_q, &c_r, &d_q, &d_r, NULL); + return EXIT_SUCCESS; +LBL_ERR: + mp_clear_multi(&a, &b, &c_q, &c_r, &d_q, &d_r, NULL); + return EXIT_FAILURE; +} + +static int test_s_mp_div_small(void) +{ + mp_int a, b, c_q, c_r, d_q, d_r; + int size, err; + + if ((err = mp_init_multi(&a, &b, &c_q, &c_r, &d_q, &d_r, NULL)) != MP_OKAY) { + goto LBL_ERR; + } + for (size = 1; size < MP_KARATSUBA_MUL_CUTOFF; size += 10) { + fprintf(stderr,"sizes = %d / %d\n", 2 * size, size); + /* Relation 10:1 */ + if ((err = mp_rand(&a, 2 * size)) != MP_OKAY) { + goto LBL_ERR; + } + if ((err = mp_rand(&b, size)) != MP_OKAY) { + goto LBL_ERR; + } + if ((err = s_mp_div_small(&a, &b, &c_q, &c_r)) != MP_OKAY) { + goto LBL_ERR; + } + if ((err = s_mp_div_school(&a, &b, &d_q, &d_r)) != MP_OKAY) { + goto LBL_ERR; + } + if (mp_cmp(&c_q, &d_q) != MP_EQ) { + fprintf(stderr, "1. Small division failed at sizes %d / %d, wrong quotient\n", + 2 * size, size); + goto LBL_ERR; + } + if (mp_cmp(&c_r, &d_r) != MP_EQ) { + fprintf(stderr, "1. Small division failed at sizes %d / %d, wrong remainder\n", + 2 * size, size); + goto LBL_ERR; + } + } + mp_clear_multi(&a, &b, &c_q, &c_r, &d_q, &d_r, NULL); + return EXIT_SUCCESS; +LBL_ERR: + mp_clear_multi(&a, &b, &c_q, &c_r, &d_q, &d_r, NULL); + return EXIT_FAILURE; +} + static int test_mp_read_write_ubin(void) { @@ -2500,6 +2633,8 @@ static int unit_tests(int argc, char **argv) T1(mp_sqrt, MP_SQRT), T1(mp_sqrtmod_prime, MP_SQRTMOD_PRIME), T1(mp_xor, MP_XOR), + T2(s_mp_div_recursive, S_MP_DIV_RECURSIVE, S_MP_DIV_SCHOOL), + T2(s_mp_div_small, S_MP_DIV_SMALL, S_MP_DIV_SCHOOL), T1(s_mp_balance_mul, S_MP_BALANCE_MUL), T1(s_mp_karatsuba_mul, S_MP_KARATSUBA_MUL), T1(s_mp_karatsuba_sqr, S_MP_KARATSUBA_SQR), diff --git a/libtommath_VS2008.vcproj b/libtommath_VS2008.vcproj index d59f71c..481b542 100644 --- a/libtommath_VS2008.vcproj +++ b/libtommath_VS2008.vcproj @@ -852,6 +852,18 @@ RelativePath="s_mp_balance_mul.c" > + + + + + + diff --git a/makefile b/makefile index f713a85..7715004 100644 --- a/makefile +++ b/makefile @@ -44,12 +44,12 @@ mp_reduce_2k_setup.o mp_reduce_2k_setup_l.o mp_reduce_is_2k.o mp_reduce_is_2k_l. mp_root_u32.o mp_rshd.o mp_sbin_size.o mp_set.o mp_set_double.o mp_set_i32.o mp_set_i64.o mp_set_l.o \ mp_set_ll.o mp_set_u32.o mp_set_u64.o mp_set_ul.o mp_set_ull.o mp_shrink.o mp_signed_rsh.o mp_sqr.o \ mp_sqrmod.o mp_sqrt.o mp_sqrtmod_prime.o mp_sub.o mp_sub_d.o mp_submod.o mp_to_radix.o mp_to_sbin.o \ -mp_to_ubin.o mp_ubin_size.o mp_unpack.o mp_xor.o mp_zero.o s_mp_add.o s_mp_balance_mul.o s_mp_exptmod.o \ -s_mp_exptmod_fast.o s_mp_get_bit.o s_mp_invmod_fast.o s_mp_invmod_slow.o s_mp_karatsuba_mul.o \ -s_mp_karatsuba_sqr.o s_mp_log.o s_mp_log_d.o s_mp_montgomery_reduce_fast.o s_mp_mul_digs.o \ -s_mp_mul_digs_fast.o s_mp_mul_high_digs.o s_mp_mul_high_digs_fast.o s_mp_prime_is_divisible.o \ -s_mp_rand_jenkins.o s_mp_rand_platform.o s_mp_reverse.o s_mp_sqr.o s_mp_sqr_fast.o s_mp_sub.o \ -s_mp_toom_mul.o s_mp_toom_sqr.o +mp_to_ubin.o mp_ubin_size.o mp_unpack.o mp_xor.o mp_zero.o s_mp_add.o s_mp_balance_mul.o \ +s_mp_div_recursive.o s_mp_div_school.o s_mp_div_small.o s_mp_exptmod.o s_mp_exptmod_fast.o s_mp_get_bit.o \ +s_mp_invmod_fast.o s_mp_invmod_slow.o s_mp_karatsuba_mul.o s_mp_karatsuba_sqr.o s_mp_log.o s_mp_log_d.o \ +s_mp_montgomery_reduce_fast.o s_mp_mul_digs.o s_mp_mul_digs_fast.o s_mp_mul_high_digs.o \ +s_mp_mul_high_digs_fast.o s_mp_prime_is_divisible.o s_mp_rand_jenkins.o s_mp_rand_platform.o \ +s_mp_reverse.o s_mp_sqr.o s_mp_sqr_fast.o s_mp_sub.o s_mp_toom_mul.o s_mp_toom_sqr.o #END_INS diff --git a/makefile.mingw b/makefile.mingw index c3a680f..3c3fcf7 100644 --- a/makefile.mingw +++ b/makefile.mingw @@ -47,12 +47,12 @@ mp_reduce_2k_setup.o mp_reduce_2k_setup_l.o mp_reduce_is_2k.o mp_reduce_is_2k_l. mp_root_u32.o mp_rshd.o mp_sbin_size.o mp_set.o mp_set_double.o mp_set_i32.o mp_set_i64.o mp_set_l.o \ mp_set_ll.o mp_set_u32.o mp_set_u64.o mp_set_ul.o mp_set_ull.o mp_shrink.o mp_signed_rsh.o mp_sqr.o \ mp_sqrmod.o mp_sqrt.o mp_sqrtmod_prime.o mp_sub.o mp_sub_d.o mp_submod.o mp_to_radix.o mp_to_sbin.o \ -mp_to_ubin.o mp_ubin_size.o mp_unpack.o mp_xor.o mp_zero.o s_mp_add.o s_mp_balance_mul.o s_mp_exptmod.o \ -s_mp_exptmod_fast.o s_mp_get_bit.o s_mp_invmod_fast.o s_mp_invmod_slow.o s_mp_karatsuba_mul.o \ -s_mp_karatsuba_sqr.o s_mp_log.o s_mp_log_d.o s_mp_montgomery_reduce_fast.o s_mp_mul_digs.o \ -s_mp_mul_digs_fast.o s_mp_mul_high_digs.o s_mp_mul_high_digs_fast.o s_mp_prime_is_divisible.o \ -s_mp_rand_jenkins.o s_mp_rand_platform.o s_mp_reverse.o s_mp_sqr.o s_mp_sqr_fast.o s_mp_sub.o \ -s_mp_toom_mul.o s_mp_toom_sqr.o +mp_to_ubin.o mp_ubin_size.o mp_unpack.o mp_xor.o mp_zero.o s_mp_add.o s_mp_balance_mul.o \ +s_mp_div_recursive.o s_mp_div_school.o s_mp_div_small.o s_mp_exptmod.o s_mp_exptmod_fast.o s_mp_get_bit.o \ +s_mp_invmod_fast.o s_mp_invmod_slow.o s_mp_karatsuba_mul.o s_mp_karatsuba_sqr.o s_mp_log.o s_mp_log_d.o \ +s_mp_montgomery_reduce_fast.o s_mp_mul_digs.o s_mp_mul_digs_fast.o s_mp_mul_high_digs.o \ +s_mp_mul_high_digs_fast.o s_mp_prime_is_divisible.o s_mp_rand_jenkins.o s_mp_rand_platform.o \ +s_mp_reverse.o s_mp_sqr.o s_mp_sqr_fast.o s_mp_sub.o s_mp_toom_mul.o s_mp_toom_sqr.o HEADERS_PUB=tommath.h HEADERS=tommath_private.h tommath_class.h tommath_superclass.h tommath_cutoffs.h $(HEADERS_PUB) diff --git a/makefile.msvc b/makefile.msvc index 9a13ffe..c42ca12 100644 --- a/makefile.msvc +++ b/makefile.msvc @@ -39,12 +39,12 @@ mp_reduce_2k_setup.obj mp_reduce_2k_setup_l.obj mp_reduce_is_2k.obj mp_reduce_is mp_root_u32.obj mp_rshd.obj mp_sbin_size.obj mp_set.obj mp_set_double.obj mp_set_i32.obj mp_set_i64.obj mp_set_l.obj \ mp_set_ll.obj mp_set_u32.obj mp_set_u64.obj mp_set_ul.obj mp_set_ull.obj mp_shrink.obj mp_signed_rsh.obj mp_sqr.obj \ mp_sqrmod.obj mp_sqrt.obj mp_sqrtmod_prime.obj mp_sub.obj mp_sub_d.obj mp_submod.obj mp_to_radix.obj mp_to_sbin.obj \ -mp_to_ubin.obj mp_ubin_size.obj mp_unpack.obj mp_xor.obj mp_zero.obj s_mp_add.obj s_mp_balance_mul.obj s_mp_exptmod.obj \ -s_mp_exptmod_fast.obj s_mp_get_bit.obj s_mp_invmod_fast.obj s_mp_invmod_slow.obj s_mp_karatsuba_mul.obj \ -s_mp_karatsuba_sqr.obj s_mp_log.obj s_mp_log_d.obj s_mp_montgomery_reduce_fast.obj s_mp_mul_digs.obj \ -s_mp_mul_digs_fast.obj s_mp_mul_high_digs.obj s_mp_mul_high_digs_fast.obj s_mp_prime_is_divisible.obj \ -s_mp_rand_jenkins.obj s_mp_rand_platform.obj s_mp_reverse.obj s_mp_sqr.obj s_mp_sqr_fast.obj s_mp_sub.obj \ -s_mp_toom_mul.obj s_mp_toom_sqr.obj +mp_to_ubin.obj mp_ubin_size.obj mp_unpack.obj mp_xor.obj mp_zero.obj s_mp_add.obj s_mp_balance_mul.obj \ +s_mp_div_recursive.obj s_mp_div_school.obj s_mp_div_small.obj s_mp_exptmod.obj s_mp_exptmod_fast.obj s_mp_get_bit.obj \ +s_mp_invmod_fast.obj s_mp_invmod_slow.obj s_mp_karatsuba_mul.obj s_mp_karatsuba_sqr.obj s_mp_log.obj s_mp_log_d.obj \ +s_mp_montgomery_reduce_fast.obj s_mp_mul_digs.obj s_mp_mul_digs_fast.obj s_mp_mul_high_digs.obj \ +s_mp_mul_high_digs_fast.obj s_mp_prime_is_divisible.obj s_mp_rand_jenkins.obj s_mp_rand_platform.obj \ +s_mp_reverse.obj s_mp_sqr.obj s_mp_sqr_fast.obj s_mp_sub.obj s_mp_toom_mul.obj s_mp_toom_sqr.obj HEADERS_PUB=tommath.h HEADERS=tommath_private.h tommath_class.h tommath_superclass.h tommath_cutoffs.h $(HEADERS_PUB) diff --git a/makefile.shared b/makefile.shared index 336d648..2c9c0e3 100644 --- a/makefile.shared +++ b/makefile.shared @@ -41,12 +41,12 @@ mp_reduce_2k_setup.o mp_reduce_2k_setup_l.o mp_reduce_is_2k.o mp_reduce_is_2k_l. mp_root_u32.o mp_rshd.o mp_sbin_size.o mp_set.o mp_set_double.o mp_set_i32.o mp_set_i64.o mp_set_l.o \ mp_set_ll.o mp_set_u32.o mp_set_u64.o mp_set_ul.o mp_set_ull.o mp_shrink.o mp_signed_rsh.o mp_sqr.o \ mp_sqrmod.o mp_sqrt.o mp_sqrtmod_prime.o mp_sub.o mp_sub_d.o mp_submod.o mp_to_radix.o mp_to_sbin.o \ -mp_to_ubin.o mp_ubin_size.o mp_unpack.o mp_xor.o mp_zero.o s_mp_add.o s_mp_balance_mul.o s_mp_exptmod.o \ -s_mp_exptmod_fast.o s_mp_get_bit.o s_mp_invmod_fast.o s_mp_invmod_slow.o s_mp_karatsuba_mul.o \ -s_mp_karatsuba_sqr.o s_mp_log.o s_mp_log_d.o s_mp_montgomery_reduce_fast.o s_mp_mul_digs.o \ -s_mp_mul_digs_fast.o s_mp_mul_high_digs.o s_mp_mul_high_digs_fast.o s_mp_prime_is_divisible.o \ -s_mp_rand_jenkins.o s_mp_rand_platform.o s_mp_reverse.o s_mp_sqr.o s_mp_sqr_fast.o s_mp_sub.o \ -s_mp_toom_mul.o s_mp_toom_sqr.o +mp_to_ubin.o mp_ubin_size.o mp_unpack.o mp_xor.o mp_zero.o s_mp_add.o s_mp_balance_mul.o \ +s_mp_div_recursive.o s_mp_div_school.o s_mp_div_small.o s_mp_exptmod.o s_mp_exptmod_fast.o s_mp_get_bit.o \ +s_mp_invmod_fast.o s_mp_invmod_slow.o s_mp_karatsuba_mul.o s_mp_karatsuba_sqr.o s_mp_log.o s_mp_log_d.o \ +s_mp_montgomery_reduce_fast.o s_mp_mul_digs.o s_mp_mul_digs_fast.o s_mp_mul_high_digs.o \ +s_mp_mul_high_digs_fast.o s_mp_prime_is_divisible.o s_mp_rand_jenkins.o s_mp_rand_platform.o \ +s_mp_reverse.o s_mp_sqr.o s_mp_sqr_fast.o s_mp_sub.o s_mp_toom_mul.o s_mp_toom_sqr.o #END_INS diff --git a/makefile.unix b/makefile.unix index 9a35dee..d9d7727 100644 --- a/makefile.unix +++ b/makefile.unix @@ -48,12 +48,12 @@ mp_reduce_2k_setup.o mp_reduce_2k_setup_l.o mp_reduce_is_2k.o mp_reduce_is_2k_l. mp_root_u32.o mp_rshd.o mp_sbin_size.o mp_set.o mp_set_double.o mp_set_i32.o mp_set_i64.o mp_set_l.o \ mp_set_ll.o mp_set_u32.o mp_set_u64.o mp_set_ul.o mp_set_ull.o mp_shrink.o mp_signed_rsh.o mp_sqr.o \ mp_sqrmod.o mp_sqrt.o mp_sqrtmod_prime.o mp_sub.o mp_sub_d.o mp_submod.o mp_to_radix.o mp_to_sbin.o \ -mp_to_ubin.o mp_ubin_size.o mp_unpack.o mp_xor.o mp_zero.o s_mp_add.o s_mp_balance_mul.o s_mp_exptmod.o \ -s_mp_exptmod_fast.o s_mp_get_bit.o s_mp_invmod_fast.o s_mp_invmod_slow.o s_mp_karatsuba_mul.o \ -s_mp_karatsuba_sqr.o s_mp_log.o s_mp_log_d.o s_mp_montgomery_reduce_fast.o s_mp_mul_digs.o \ -s_mp_mul_digs_fast.o s_mp_mul_high_digs.o s_mp_mul_high_digs_fast.o s_mp_prime_is_divisible.o \ -s_mp_rand_jenkins.o s_mp_rand_platform.o s_mp_reverse.o s_mp_sqr.o s_mp_sqr_fast.o s_mp_sub.o \ -s_mp_toom_mul.o s_mp_toom_sqr.o +mp_to_ubin.o mp_ubin_size.o mp_unpack.o mp_xor.o mp_zero.o s_mp_add.o s_mp_balance_mul.o \ +s_mp_div_recursive.o s_mp_div_school.o s_mp_div_small.o s_mp_exptmod.o s_mp_exptmod_fast.o s_mp_get_bit.o \ +s_mp_invmod_fast.o s_mp_invmod_slow.o s_mp_karatsuba_mul.o s_mp_karatsuba_sqr.o s_mp_log.o s_mp_log_d.o \ +s_mp_montgomery_reduce_fast.o s_mp_mul_digs.o s_mp_mul_digs_fast.o s_mp_mul_high_digs.o \ +s_mp_mul_high_digs_fast.o s_mp_prime_is_divisible.o s_mp_rand_jenkins.o s_mp_rand_platform.o \ +s_mp_reverse.o s_mp_sqr.o s_mp_sqr_fast.o s_mp_sub.o s_mp_toom_mul.o s_mp_toom_sqr.o HEADERS_PUB=tommath.h HEADERS=tommath_private.h tommath_class.h tommath_superclass.h tommath_cutoffs.h $(HEADERS_PUB) diff --git a/mp_div.c b/mp_div.c index 1c87005..5fd9572 100644 --- a/mp_div.c +++ b/mp_div.c @@ -3,13 +3,8 @@ /* LibTomMath, multiple-precision integer library -- Tom St Denis */ /* SPDX-License-Identifier: Unlicense */ -#ifdef MP_DIV_SMALL - -/* slower bit-bang division... also smaller */ mp_err mp_div(const mp_int *a, const mp_int *b, mp_int *c, mp_int *d) { - mp_int ta, tb, tq, q; - int n, n2; mp_err err; /* is divisor zero ? */ @@ -17,7 +12,7 @@ mp_err mp_div(const mp_int *a, const mp_int *b, mp_int *c, mp_int *d) return MP_VAL; } - /* if a < b then q=0, r = a */ + /* if a < b then q = 0, r = a */ if (mp_cmp_mag(a, b) == MP_LT) { if (d != NULL) { err = mp_copy(a, d); @@ -30,221 +25,17 @@ mp_err mp_div(const mp_int *a, const mp_int *b, mp_int *c, mp_int *d) return err; } - /* init our temps */ - if ((err = mp_init_multi(&ta, &tb, &tq, &q, NULL)) != MP_OKAY) { - return err; - } - - - mp_set(&tq, 1uL); - n = mp_count_bits(a) - mp_count_bits(b); - if ((err = mp_abs(a, &ta)) != MP_OKAY) goto LBL_ERR; - if ((err = mp_abs(b, &tb)) != MP_OKAY) goto LBL_ERR; - if ((err = mp_mul_2d(&tb, n, &tb)) != MP_OKAY) goto LBL_ERR; - if ((err = mp_mul_2d(&tq, n, &tq)) != MP_OKAY) goto LBL_ERR; - - while (n-- >= 0) { - if (mp_cmp(&tb, &ta) != MP_GT) { - if ((err = mp_sub(&ta, &tb, &ta)) != MP_OKAY) goto LBL_ERR; - if ((err = mp_add(&q, &tq, &q)) != MP_OKAY) goto LBL_ERR; - } - if ((err = mp_div_2d(&tb, 1, &tb, NULL)) != MP_OKAY) goto LBL_ERR; - if ((err = mp_div_2d(&tq, 1, &tq, NULL)) != MP_OKAY) goto LBL_ERR; - } - - /* now q == quotient and ta == remainder */ - n = a->sign; - n2 = (a->sign == b->sign) ? MP_ZPOS : MP_NEG; - if (c != NULL) { - mp_exch(c, &q); - c->sign = MP_IS_ZERO(c) ? MP_ZPOS : n2; - } - if (d != NULL) { - mp_exch(d, &ta); - d->sign = MP_IS_ZERO(d) ? MP_ZPOS : n; - } -LBL_ERR: - mp_clear_multi(&ta, &tb, &tq, &q, NULL); - return err; -} - -#else - -/* integer signed division. - * c*b + d == a [e.g. a/b, c=quotient, d=remainder] - * HAC pp.598 Algorithm 14.20 - * - * Note that the description in HAC is horribly - * incomplete. For example, it doesn't consider - * the case where digits are removed from 'x' in - * the inner loop. It also doesn't consider the - * case that y has fewer than three digits, etc.. - * - * The overall algorithm is as described as - * 14.20 from HAC but fixed to treat these cases. -*/ -mp_err mp_div(const mp_int *a, const mp_int *b, mp_int *c, mp_int *d) -{ - mp_int q, x, y, t1, t2; - int n, t, i, norm; - mp_sign neg; - mp_err err; - - /* is divisor zero ? */ - if (MP_IS_ZERO(b)) { - return MP_VAL; - } - - /* if a < b then q=0, r = a */ - if (mp_cmp_mag(a, b) == MP_LT) { - if (d != NULL) { - err = mp_copy(a, d); - } else { - err = MP_OKAY; - } - if (c != NULL) { - mp_zero(c); - } - return err; - } - - if ((err = mp_init_size(&q, a->used + 2)) != MP_OKAY) { - return err; - } - q.used = a->used + 2; - - if ((err = mp_init(&t1)) != MP_OKAY) goto LBL_Q; - - if ((err = mp_init(&t2)) != MP_OKAY) goto LBL_T1; - - if ((err = mp_init_copy(&x, a)) != MP_OKAY) goto LBL_T2; - - if ((err = mp_init_copy(&y, b)) != MP_OKAY) goto LBL_X; - - /* fix the sign */ - neg = (a->sign == b->sign) ? MP_ZPOS : MP_NEG; - x.sign = y.sign = MP_ZPOS; - - /* normalize both x and y, ensure that y >= b/2, [b == 2**MP_DIGIT_BIT] */ - norm = mp_count_bits(&y) % MP_DIGIT_BIT; - if (norm < (MP_DIGIT_BIT - 1)) { - norm = (MP_DIGIT_BIT - 1) - norm; - if ((err = mp_mul_2d(&x, norm, &x)) != MP_OKAY) goto LBL_Y; - if ((err = mp_mul_2d(&y, norm, &y)) != MP_OKAY) goto LBL_Y; + if (MP_HAS(S_MP_DIV_RECURSIVE) + && (b->used > MP_KARATSUBA_MUL_CUTOFF) + && (b->used <= ((a->used)/3*2))) { + err = s_mp_div_recursive(a, b, c, d); + } else if (MP_HAS(S_MP_DIV_SCHOOL)) { + err = s_mp_div_school(a, b, c, d); } else { - norm = 0; + err = s_mp_div_small(a, b, c, d); } - /* note hac does 0 based, so if used==5 then its 0,1,2,3,4, e.g. use 4 */ - n = x.used - 1; - t = y.used - 1; - - /* while (x >= y*b**n-t) do { q[n-t] += 1; x -= y*b**{n-t} } */ - /* y = y*b**{n-t} */ - if ((err = mp_lshd(&y, n - t)) != MP_OKAY) goto LBL_Y; - - while (mp_cmp(&x, &y) != MP_LT) { - ++(q.dp[n - t]); - if ((err = mp_sub(&x, &y, &x)) != MP_OKAY) goto LBL_Y; - } - - /* reset y by shifting it back down */ - mp_rshd(&y, n - t); - - /* step 3. for i from n down to (t + 1) */ - for (i = n; i >= (t + 1); i--) { - if (i > x.used) { - continue; - } - - /* step 3.1 if xi == yt then set q{i-t-1} to b-1, - * otherwise set q{i-t-1} to (xi*b + x{i-1})/yt */ - if (x.dp[i] == y.dp[t]) { - q.dp[(i - t) - 1] = ((mp_digit)1 << (mp_digit)MP_DIGIT_BIT) - (mp_digit)1; - } else { - mp_word tmp; - tmp = (mp_word)x.dp[i] << (mp_word)MP_DIGIT_BIT; - tmp |= (mp_word)x.dp[i - 1]; - tmp /= (mp_word)y.dp[t]; - if (tmp > (mp_word)MP_MASK) { - tmp = MP_MASK; - } - q.dp[(i - t) - 1] = (mp_digit)(tmp & (mp_word)MP_MASK); - } - - /* while (q{i-t-1} * (yt * b + y{t-1})) > - xi * b**2 + xi-1 * b + xi-2 - - do q{i-t-1} -= 1; - */ - q.dp[(i - t) - 1] = (q.dp[(i - t) - 1] + 1uL) & (mp_digit)MP_MASK; - do { - q.dp[(i - t) - 1] = (q.dp[(i - t) - 1] - 1uL) & (mp_digit)MP_MASK; - - /* find left hand */ - mp_zero(&t1); - t1.dp[0] = ((t - 1) < 0) ? 0u : y.dp[t - 1]; - t1.dp[1] = y.dp[t]; - t1.used = 2; - if ((err = mp_mul_d(&t1, q.dp[(i - t) - 1], &t1)) != MP_OKAY) goto LBL_Y; - - /* find right hand */ - t2.dp[0] = ((i - 2) < 0) ? 0u : x.dp[i - 2]; - t2.dp[1] = x.dp[i - 1]; /* i >= 1 always holds */ - t2.dp[2] = x.dp[i]; - t2.used = 3; - } while (mp_cmp_mag(&t1, &t2) == MP_GT); - - /* step 3.3 x = x - q{i-t-1} * y * b**{i-t-1} */ - if ((err = mp_mul_d(&y, q.dp[(i - t) - 1], &t1)) != MP_OKAY) goto LBL_Y; - - if ((err = mp_lshd(&t1, (i - t) - 1)) != MP_OKAY) goto LBL_Y; - - if ((err = mp_sub(&x, &t1, &x)) != MP_OKAY) goto LBL_Y; - - /* if x < 0 then { x = x + y*b**{i-t-1}; q{i-t-1} -= 1; } */ - if (x.sign == MP_NEG) { - if ((err = mp_copy(&y, &t1)) != MP_OKAY) goto LBL_Y; - if ((err = mp_lshd(&t1, (i - t) - 1)) != MP_OKAY) goto LBL_Y; - if ((err = mp_add(&x, &t1, &x)) != MP_OKAY) goto LBL_Y; - - q.dp[(i - t) - 1] = (q.dp[(i - t) - 1] - 1uL) & MP_MASK; - } - } - - /* now q is the quotient and x is the remainder - * [which we have to normalize] - */ - - /* get sign before writing to c */ - x.sign = (x.used == 0) ? MP_ZPOS : a->sign; - - if (c != NULL) { - mp_clamp(&q); - mp_exch(&q, c); - c->sign = neg; - } - - if (d != NULL) { - if ((err = mp_div_2d(&x, norm, &x, NULL)) != MP_OKAY) goto LBL_Y; - mp_exch(&x, d); - } - - err = MP_OKAY; - -LBL_Y: - mp_clear(&y); -LBL_X: - mp_clear(&x); -LBL_T2: - mp_clear(&t2); -LBL_T1: - mp_clear(&t1); -LBL_Q: - mp_clear(&q); return err; } - #endif -#endif diff --git a/s_mp_div_recursive.c b/s_mp_div_recursive.c new file mode 100644 index 0000000..67b4a05 --- /dev/null +++ b/s_mp_div_recursive.c @@ -0,0 +1,182 @@ +#include "tommath_private.h" +#ifdef S_MP_DIV_RECURSIVE_C +/* LibTomMath, multiple-precision integer library -- Tom St Denis */ +/* SPDX-License-Identifier: Unlicense */ + +/* + Direct implementation of algorithms 1.8 "RecursiveDivRem" and 1.9 "UnbalancedDivision" + from: + + Brent, Richard P., and Paul Zimmermann. "Modern computer arithmetic" + Vol. 18. Cambridge University Press, 2010 + Available online at https://arxiv.org/pdf/1004.4710 + + pages 19ff. in the above online document. +*/ + +static mp_err s_mp_recursion(const mp_int *a, const mp_int *b, mp_int *q, mp_int *r) +{ + mp_err err; + int m, k; + mp_int A1, A2, B1, B0, Q1, Q0, R1, R0, t; + + m = a->used - b->used; + if (m < MP_KARATSUBA_MUL_CUTOFF) { + return s_mp_div_school(a, b, q, r); + } + + if ((err = mp_init_multi(&A1, &A2, &B1, &B0, &Q1, &Q0, &R1, &R0, &t, NULL)) != MP_OKAY) { + goto LBL_ERR; + } + + /* k = floor(m/2) */ + k = m/2; + + /* B1 = b / beta^k, B0 = b % beta^k*/ + if ((err = mp_div_2d(b, k * MP_DIGIT_BIT, &B1, &B0)) != MP_OKAY) goto LBL_ERR; + + /* (Q1, R1) = RecursiveDivRem(A / beta^(2k), B1) */ + if ((err = mp_div_2d(a, 2*k * MP_DIGIT_BIT, &A1, &t)) != MP_OKAY) goto LBL_ERR; + if ((err = s_mp_recursion(&A1, &B1, &Q1, &R1)) != MP_OKAY) goto LBL_ERR; + + /* A1 = (R1 * beta^(2k)) + (A % beta^(2k)) - (Q1 * B0 * beta^k) */ + if ((err = mp_lshd(&R1, 2*k)) != MP_OKAY) goto LBL_ERR; + if ((err = mp_add(&R1, &t, &A1)) != MP_OKAY) goto LBL_ERR; + if ((err = mp_mul(&Q1, &B0, &t)) != MP_OKAY) goto LBL_ERR; + if ((err = mp_lshd(&t, k)) != MP_OKAY) goto LBL_ERR; + if ((err = mp_sub(&A1, &t, &A1)) != MP_OKAY) goto LBL_ERR; + + /* while A1 < 0 do Q1 = Q1 - 1, A1 = A1 + (beta^k * B) */ + if ((err = mp_mul_2d(b, k * MP_DIGIT_BIT, &t)) != MP_OKAY) goto LBL_ERR; + while (mp_cmp_d(&A1, 0) == MP_LT) { + if ((err = mp_decr(&Q1)) != MP_OKAY) goto LBL_ERR; + if ((err = mp_add(&A1, &t, &A1)) != MP_OKAY) goto LBL_ERR; + } + + /* (Q0, R0) = RecursiveDivRem(A1 / beta^(k), B1) */ + if ((err = mp_div_2d(&A1, k * MP_DIGIT_BIT, &A1, &t)) != MP_OKAY) goto LBL_ERR; + if ((err = s_mp_recursion(&A1, &B1, &Q0, &R0)) != MP_OKAY) goto LBL_ERR; + + /* A2 = (R0*beta^k) + (A1 % beta^k) - (Q0*B0) */ + if ((err = mp_lshd(&R0, k)) != MP_OKAY) goto LBL_ERR; + if ((err = mp_add(&R0, &t, &A2)) != MP_OKAY) goto LBL_ERR; + if ((err = mp_mul(&Q0, &B0, &t)) != MP_OKAY) goto LBL_ERR; + if ((err = mp_sub(&A2, &t, &A2)) != MP_OKAY) goto LBL_ERR; + + /* while A2 < 0 do Q0 = Q0 - 1, A2 = A2 + B */ + while (mp_cmp_d(&A2, 0) == MP_LT) { + if ((err = mp_decr(&Q0)) != MP_OKAY) goto LBL_ERR; + if ((err = mp_add(&A2, b, &A2)) != MP_OKAY) goto LBL_ERR; + } + /* return q = (Q1*beta^k) + Q0, r = A2 */ + if (q != NULL) { + if ((err = mp_lshd(&Q1, k)) != MP_OKAY) goto LBL_ERR; + if ((err = mp_add(&Q1, &Q0, q)) != MP_OKAY) goto LBL_ERR; + } + if (r != NULL) { + if ((err = mp_copy(&A2, r)) != MP_OKAY) goto LBL_ERR; + } + +LBL_ERR: + mp_clear_multi(&A1, &A2, &B1, &B0, &Q1, &Q0, &R1, &R0, &t, NULL); + return err; +} + + +mp_err s_mp_div_recursive(const mp_int *a, const mp_int *b, mp_int *q, mp_int *r) +{ + int j, m, n, sigma; + mp_err err; + mp_sign neg; + mp_digit msb_b, msb; + mp_int A, B, Q, Q1, R, A_div, A_mod; + + if ((err = mp_init_multi(&A, &B, &Q, &Q1, &R, &A_div, &A_mod, NULL)) != MP_OKAY) { + goto LBL_ERR; + } + + /* most significant bit of a limb */ + /* assumes MP_DIGIT_MAX < (sizeof(mp_digit) * CHAR_BIT) */ + msb = (MP_DIGIT_MAX + (mp_digit)(1)) >> 1; + + /* + Method to compute sigma shamelessly stolen from + + J. Burnikel and J. Ziegler, "Fast recursive division", Research Report + MPI-I-98-1-022, Max-Planck-Institut fuer Informatik, Saarbruecken, Germany, + October 1998. (available online) + + Vid. section 2.3. + */ + m = MP_KARATSUBA_MUL_CUTOFF; + while (m <= b->used) { + m <<= 1; + } + j = (b->used + m - 1) / m; + n = j * m; + + sigma = MP_DIGIT_BIT * (n - b->used); + msb_b = b->dp[b->used - 1]; + while (msb_b < msb) { + sigma++; + msb_b <<= 1; + } + /* Use that sigma to normalize B */ + if ((err = mp_mul_2d(b, sigma, &B)) != MP_OKAY) { + goto LBL_ERR; + } + if ((err = mp_mul_2d(a, sigma, &A)) != MP_OKAY) { + goto LBL_ERR; + } + + /* fix the sign */ + neg = (a->sign == b->sign) ? MP_ZPOS : MP_NEG; + A.sign = B.sign = MP_ZPOS; + + /* + If the magnitude of "A" is not more more than twice that of "B" we can work + on them directly, otherwise we need to work at "A" in chunks + */ + n = B.used; + m = A.used - B.used; + + /* Q = 0 */ + mp_zero(&Q); + while (m > n) { + /* (q, r) = RecursveDivRem(A / (beta^(m-n)), B) */ + j = (m - n) * MP_DIGIT_BIT; + if ((err = mp_div_2d(&A, j, &A_div, &A_mod)) != MP_OKAY) goto LBL_ERR; + if ((err = s_mp_recursion(&A_div, &B, &Q1, &R)) != MP_OKAY) goto LBL_ERR; + /* Q = (Q*beta!(n)) + q */ + if ((err = mp_mul_2d(&Q, n * MP_DIGIT_BIT, &Q)) != MP_OKAY) goto LBL_ERR; + if ((err = mp_add(&Q, &Q1, &Q)) != MP_OKAY) goto LBL_ERR; + /* A = (r * beta^(m-n)) + (A % beta^(m-n))*/ + if ((err = mp_mul_2d(&R, (m - n) * MP_DIGIT_BIT, &R)) != MP_OKAY) goto LBL_ERR; + if ((err = mp_add(&R, &A_mod, &A)) != MP_OKAY) goto LBL_ERR; + /* m = m - n */ + m = m - n; + } + /* (q, r) = RecursveDivRem(A, B) */ + if ((err = s_mp_recursion(&A, &B, &Q1, &R)) != MP_OKAY) goto LBL_ERR; + /* Q = (Q * beta^m) + q, R = r */ + if ((err = mp_mul_2d(&Q, m * MP_DIGIT_BIT, &Q)) != MP_OKAY) goto LBL_ERR; + if ((err = mp_add(&Q, &Q1, &Q)) != MP_OKAY) goto LBL_ERR; + + /* get sign before writing to c */ + Q.sign = (Q.used == 0) ? MP_ZPOS : a->sign; + + if (q != NULL) { + mp_exch(&Q, q); + q->sign = neg; + } + if (r != NULL) { + /* de-normalize the remainder */ + if ((err = mp_div_2d(&R, sigma, &R, NULL)) != MP_OKAY) goto LBL_ERR; + mp_exch(&R, r); + } +LBL_ERR: + mp_clear_multi(&A, &B, &Q, &Q1, &R, &A_div, &A_mod, NULL); + return err; +} + +#endif diff --git a/s_mp_div_school.c b/s_mp_div_school.c new file mode 100644 index 0000000..6ff427a --- /dev/null +++ b/s_mp_div_school.c @@ -0,0 +1,158 @@ +#include "tommath_private.h" +#ifdef S_MP_DIV_SCHOOL_C +/* LibTomMath, multiple-precision integer library -- Tom St Denis */ +/* SPDX-License-Identifier: Unlicense */ + +/* integer signed division. + * c*b + d == a [e.g. a/b, c=quotient, d=remainder] + * HAC pp.598 Algorithm 14.20 + * + * Note that the description in HAC is horribly + * incomplete. For example, it doesn't consider + * the case where digits are removed from 'x' in + * the inner loop. It also doesn't consider the + * case that y has fewer than three digits, etc.. + * + * The overall algorithm is as described as + * 14.20 from HAC but fixed to treat these cases. +*/ +mp_err s_mp_div_school(const mp_int *a, const mp_int *b, mp_int *c, mp_int *d) +{ + mp_int q, x, y, t1, t2; + int n, t, i, norm; + mp_sign neg; + mp_err err; + + if ((err = mp_init_size(&q, a->used + 2)) != MP_OKAY) { + return err; + } + q.used = a->used + 2; + + if ((err = mp_init(&t1)) != MP_OKAY) goto LBL_Q; + if ((err = mp_init(&t2)) != MP_OKAY) goto LBL_T1; + if ((err = mp_init_copy(&x, a)) != MP_OKAY) goto LBL_T2; + if ((err = mp_init_copy(&y, b)) != MP_OKAY) goto LBL_X; + + /* fix the sign */ + neg = (a->sign == b->sign) ? MP_ZPOS : MP_NEG; + x.sign = y.sign = MP_ZPOS; + + /* normalize both x and y, ensure that y >= b/2, [b == 2**MP_DIGIT_BIT] */ + norm = mp_count_bits(&y) % MP_DIGIT_BIT; + if (norm < (MP_DIGIT_BIT - 1)) { + norm = (MP_DIGIT_BIT - 1) - norm; + if ((err = mp_mul_2d(&x, norm, &x)) != MP_OKAY) goto LBL_Y; + if ((err = mp_mul_2d(&y, norm, &y)) != MP_OKAY) goto LBL_Y; + } else { + norm = 0; + } + + /* note hac does 0 based, so if used==5 then its 0,1,2,3,4, e.g. use 4 */ + n = x.used - 1; + t = y.used - 1; + + /* while (x >= y*b**n-t) do { q[n-t] += 1; x -= y*b**{n-t} } */ + /* y = y*b**{n-t} */ + if ((err = mp_lshd(&y, n - t)) != MP_OKAY) goto LBL_Y; + + while (mp_cmp(&x, &y) != MP_LT) { + ++(q.dp[n - t]); + if ((err = mp_sub(&x, &y, &x)) != MP_OKAY) goto LBL_Y; + } + + /* reset y by shifting it back down */ + mp_rshd(&y, n - t); + + /* step 3. for i from n down to (t + 1) */ + for (i = n; i >= (t + 1); i--) { + if (i > x.used) { + continue; + } + + /* step 3.1 if xi == yt then set q{i-t-1} to b-1, + * otherwise set q{i-t-1} to (xi*b + x{i-1})/yt */ + if (x.dp[i] == y.dp[t]) { + q.dp[(i - t) - 1] = ((mp_digit)1 << (mp_digit)MP_DIGIT_BIT) - (mp_digit)1; + } else { + mp_word tmp; + tmp = (mp_word)x.dp[i] << (mp_word)MP_DIGIT_BIT; + tmp |= (mp_word)x.dp[i - 1]; + tmp /= (mp_word)y.dp[t]; + if (tmp > (mp_word)MP_MASK) { + tmp = MP_MASK; + } + q.dp[(i - t) - 1] = (mp_digit)(tmp & (mp_word)MP_MASK); + } + + /* while (q{i-t-1} * (yt * b + y{t-1})) > + xi * b**2 + xi-1 * b + xi-2 + + do q{i-t-1} -= 1; + */ + q.dp[(i - t) - 1] = (q.dp[(i - t) - 1] + 1uL) & (mp_digit)MP_MASK; + do { + q.dp[(i - t) - 1] = (q.dp[(i - t) - 1] - 1uL) & (mp_digit)MP_MASK; + + /* find left hand */ + mp_zero(&t1); + t1.dp[0] = ((t - 1) < 0) ? 0u : y.dp[t - 1]; + t1.dp[1] = y.dp[t]; + t1.used = 2; + if ((err = mp_mul_d(&t1, q.dp[(i - t) - 1], &t1)) != MP_OKAY) goto LBL_Y; + + /* find right hand */ + t2.dp[0] = ((i - 2) < 0) ? 0u : x.dp[i - 2]; + t2.dp[1] = x.dp[i - 1]; /* i >= 1 always holds */ + t2.dp[2] = x.dp[i]; + t2.used = 3; + } while (mp_cmp_mag(&t1, &t2) == MP_GT); + + /* step 3.3 x = x - q{i-t-1} * y * b**{i-t-1} */ + if ((err = mp_mul_d(&y, q.dp[(i - t) - 1], &t1)) != MP_OKAY) goto LBL_Y; + if ((err = mp_lshd(&t1, (i - t) - 1)) != MP_OKAY) goto LBL_Y; + if ((err = mp_sub(&x, &t1, &x)) != MP_OKAY) goto LBL_Y; + + /* if x < 0 then { x = x + y*b**{i-t-1}; q{i-t-1} -= 1; } */ + if (x.sign == MP_NEG) { + if ((err = mp_copy(&y, &t1)) != MP_OKAY) goto LBL_Y; + if ((err = mp_lshd(&t1, (i - t) - 1)) != MP_OKAY) goto LBL_Y; + if ((err = mp_add(&x, &t1, &x)) != MP_OKAY) goto LBL_Y; + + q.dp[(i - t) - 1] = (q.dp[(i - t) - 1] - 1uL) & MP_MASK; + } + } + + /* now q is the quotient and x is the remainder + * [which we have to normalize] + */ + + /* get sign before writing to c */ + x.sign = (x.used == 0) ? MP_ZPOS : a->sign; + + if (c != NULL) { + mp_clamp(&q); + mp_exch(&q, c); + c->sign = neg; + } + + if (d != NULL) { + if ((err = mp_div_2d(&x, norm, &x, NULL)) != MP_OKAY) goto LBL_Y; + mp_exch(&x, d); + } + + err = MP_OKAY; + +LBL_Y: + mp_clear(&y); +LBL_X: + mp_clear(&x); +LBL_T2: + mp_clear(&t2); +LBL_T1: + mp_clear(&t1); +LBL_Q: + mp_clear(&q); + return err; +} + +#endif diff --git a/s_mp_div_small.c b/s_mp_div_small.c new file mode 100644 index 0000000..56b1335 --- /dev/null +++ b/s_mp_div_small.c @@ -0,0 +1,51 @@ +#include "tommath_private.h" +#ifdef S_MP_DIV_SMALL_C +/* LibTomMath, multiple-precision integer library -- Tom St Denis */ +/* SPDX-License-Identifier: Unlicense */ + +/* slower bit-bang division... also smaller */ +mp_err s_mp_div_small(const mp_int *a, const mp_int *b, mp_int *c, mp_int *d) +{ + mp_int ta, tb, tq, q; + int n; + mp_sign sign; + mp_err err; + + /* init our temps */ + if ((err = mp_init_multi(&ta, &tb, &tq, &q, NULL)) != MP_OKAY) { + return err; + } + + mp_set(&tq, 1uL); + n = mp_count_bits(a) - mp_count_bits(b); + if ((err = mp_abs(a, &ta)) != MP_OKAY) goto LBL_ERR; + if ((err = mp_abs(b, &tb)) != MP_OKAY) goto LBL_ERR; + if ((err = mp_mul_2d(&tb, n, &tb)) != MP_OKAY) goto LBL_ERR; + if ((err = mp_mul_2d(&tq, n, &tq)) != MP_OKAY) goto LBL_ERR; + + while (n-- >= 0) { + if (mp_cmp(&tb, &ta) != MP_GT) { + if ((err = mp_sub(&ta, &tb, &ta)) != MP_OKAY) goto LBL_ERR; + if ((err = mp_add(&q, &tq, &q)) != MP_OKAY) goto LBL_ERR; + } + if ((err = mp_div_2d(&tb, 1, &tb, NULL)) != MP_OKAY) goto LBL_ERR; + if ((err = mp_div_2d(&tq, 1, &tq, NULL)) != MP_OKAY) goto LBL_ERR; + } + + /* now q == quotient and ta == remainder */ + + sign = (a->sign == b->sign) ? MP_ZPOS : MP_NEG; + if (c != NULL) { + mp_exch(c, &q); + c->sign = MP_IS_ZERO(c) ? MP_ZPOS : sign; + } + if (d != NULL) { + mp_exch(d, &ta); + d->sign = MP_IS_ZERO(d) ? MP_ZPOS : a->sign; + } +LBL_ERR: + mp_clear_multi(&ta, &tb, &tq, &q, NULL); + return err; +} + +#endif diff --git a/tommath_class.h b/tommath_class.h index f7812da..5373d91 100644 --- a/tommath_class.h +++ b/tommath_class.h @@ -146,6 +146,9 @@ # define MP_ZERO_C # define S_MP_ADD_C # define S_MP_BALANCE_MUL_C +# define S_MP_DIV_RECURSIVE_C +# define S_MP_DIV_SCHOOL_C +# define S_MP_DIV_SMALL_C # define S_MP_EXPTMOD_C # define S_MP_EXPTMOD_FAST_C # define S_MP_GET_BIT_C @@ -250,24 +253,12 @@ #endif #if defined(MP_DIV_C) -# define MP_ADD_C -# define MP_CLAMP_C -# define MP_CLEAR_C -# define MP_CMP_C # define MP_CMP_MAG_C # define MP_COPY_C -# define MP_COUNT_BITS_C -# define MP_DIV_2D_C -# define MP_EXCH_C -# define MP_INIT_C -# define MP_INIT_COPY_C -# define MP_INIT_SIZE_C -# define MP_LSHD_C -# define MP_MUL_2D_C -# define MP_MUL_D_C -# define MP_RSHD_C -# define MP_SUB_C # define MP_ZERO_C +# define S_MP_DIV_RECURSIVE_C +# define S_MP_DIV_SCHOOL_C +# define S_MP_DIV_SMALL_C #endif #if defined(MP_DIV_2_C) @@ -1022,6 +1013,59 @@ # define MP_MUL_C #endif +#if defined(S_MP_DIV_RECURSIVE_C) +# define MP_ADD_C +# define MP_CLEAR_MULTI_C +# define MP_CMP_D_C +# define MP_COPY_C +# define MP_DECR_C +# define MP_DIV_2D_C +# define MP_EXCH_C +# define MP_INIT_MULTI_C +# define MP_LSHD_C +# define MP_MUL_2D_C +# define MP_MUL_C +# define MP_SUB_C +# define MP_ZERO_C +# define S_MP_DIV_SCHOOL_C +# define S_MP_RECURSION_C +#endif + +#if defined(S_MP_DIV_SCHOOL_C) +# define MP_ADD_C +# define MP_CLAMP_C +# define MP_CLEAR_C +# define MP_CMP_C +# define MP_CMP_MAG_C +# define MP_COPY_C +# define MP_COUNT_BITS_C +# define MP_DIV_2D_C +# define MP_EXCH_C +# define MP_INIT_C +# define MP_INIT_COPY_C +# define MP_INIT_SIZE_C +# define MP_LSHD_C +# define MP_MUL_2D_C +# define MP_MUL_D_C +# define MP_RSHD_C +# define MP_SUB_C +# define MP_ZERO_C +#endif + +#if defined(S_MP_DIV_SMALL_C) +# define MP_ABS_C +# define MP_ADD_C +# define MP_CLEAR_MULTI_C +# define MP_CMP_C +# define MP_COUNT_BITS_C +# define MP_DIV_2D_C +# define MP_EXCH_C +# define MP_INIT_MULTI_C +# define MP_MUL_2D_C +# define MP_SET_C +# define MP_SUB_C +#endif + #if defined(S_MP_EXPTMOD_C) # define MP_CLEAR_C # define MP_COPY_C diff --git a/tommath_private.h b/tommath_private.h index 8fcc991..e37def4 100644 --- a/tommath_private.h +++ b/tommath_private.h @@ -213,6 +213,11 @@ MP_PRIVATE mp_err s_mp_prime_is_divisible(const mp_int *a, mp_bool *result); MP_PRIVATE mp_digit s_mp_log_d(mp_digit base, mp_digit n); MP_PRIVATE mp_err s_mp_log(const mp_int *a, uint32_t base, uint32_t *c); + +MP_PRIVATE mp_err s_mp_div_recursive(const mp_int *a, const mp_int *b, mp_int *q, mp_int *r); +MP_PRIVATE mp_err s_mp_div_school(const mp_int *a, const mp_int *b, mp_int *c, mp_int *d); +MP_PRIVATE mp_err s_mp_div_small(const mp_int *a, const mp_int *b, mp_int *c, mp_int *d); + /* TODO: jenkins prng is not thread safe as of now */ MP_PRIVATE mp_err s_mp_rand_jenkins(void *p, size_t n) MP_WUR; MP_PRIVATE void s_mp_rand_jenkins_init(uint64_t seed); diff --git a/tommath_superclass.h b/tommath_superclass.h index 7179975..6961a59 100644 --- a/tommath_superclass.h +++ b/tommath_superclass.h @@ -68,7 +68,7 @@ # define S_MP_REVERSE_C /* other modifiers */ -# define MP_DIV_SMALL /* Slower division, not critical */ + /* here we are on the last pass so we turn things off. The functions classes are still there