added multiplcation balancing for the Toom-Cook algorithms

This commit is contained in:
czurnieden 2019-04-06 18:55:43 +02:00
parent 7f42ce0968
commit 27ca25cb82
12 changed files with 1990 additions and 284 deletions

79
bn_mp_balance_mul.c Normal file
View File

@ -0,0 +1,79 @@
#include "tommath_private.h"
#ifdef BN_MP_BALANCE_MUL_C
/* LibTomMath, multiple-precision integer library -- Tom St Denis */
/* SPDX-License-Identifier: Unlicense */
/* single-digit multiplication with the smaller number as the single-digit */
int mp_balance_mul(const mp_int *a, const mp_int *b, mp_int *c)
{
int e, count, len_a, len_b, nblocks, i, j, bsize;
mp_int a0, tmp, A, B, r;
len_a = a->used;
len_b = b->used;
nblocks = MAX(a->used, b->used) / MIN(a->used, b->used);
bsize = MIN(a->used, b->used) ;
e = MP_OKAY;
if ((e = mp_init_size(&a0, bsize + 2)) != MP_OKAY) {
return e;
}
if ((e = mp_init_multi(&tmp, &r, NULL)) != MP_OKAY) {
mp_clear(&a0);
return e;
}
/* Make sure that A is the larger one*/
if (len_a < len_b) {
B = *a;
A = *b;
} else {
A = *a;
B = *b;
}
for (i = 0, j=0; i < nblocks; i++) {
/* Cut a slice off of a */
a0.used = 0;
for (count = 0; count < bsize; count++) {
a0.dp[count] = A.dp[ j++ ];
a0.used++;
}
/* Multiply with b */
if ((e = mp_mul(&a0, &B, &tmp)) != MP_OKAY) {
goto LBL_ERR;
}
/* Shift tmp to the correct position */
if ((e = mp_lshd(&tmp, bsize * i)) != MP_OKAY) {
goto LBL_ERR;
}
/* Add to output. No carry needed */
if ((e = mp_add(&r, &tmp, &r)) != MP_OKAY) {
goto LBL_ERR;
}
}
/* The left-overs; there are always left-overs */
if (j < A.used) {
a0.used = 0;
for (count = 0; j < A.used; count++) {
a0.dp[count] = A.dp[ j++ ];
a0.used++;
}
if ((e = mp_mul(&a0, &B, &tmp)) != MP_OKAY) {
goto LBL_ERR;
}
if ((e = mp_lshd(&tmp, bsize * i)) != MP_OKAY) {
goto LBL_ERR;
}
if ((e = mp_add(&r, &tmp, &r)) != MP_OKAY) {
goto LBL_ERR;
}
}
mp_exch(&r,c);
LBL_ERR:
mp_clear_multi(&a0, &tmp, &r,NULL);
return e;
}
#endif

View File

@ -7,7 +7,41 @@
int mp_mul(const mp_int *a, const mp_int *b, mp_int *c)
{
int res, neg;
#ifdef BN_MP_BALANCE_MUL_C
int len_b, len_a;
#endif
neg = (a->sign == b->sign) ? MP_ZPOS : MP_NEG;
#ifdef BN_MP_BALANCE_MUL_C
len_a = a->used;
len_b = b->used;
if (len_a == len_b) {
goto GO_ON;
}
/*
* Check sizes. The smaller one needs to be larger than the Karatsuba cut-off.
* The bigger one needs to be at least about one KARATSUBA_MUL_CUTOFF bigger
* to make some sense, but it depends on architecture, OS, position of the
* stars... so YMMV.
* Using it to cut the input into slices small enough for fast_s_mp_mul_digs
* was actually slower on the author's machine, but YMMV.
*/
if ((MIN(len_a, len_b) < KARATSUBA_MUL_CUTOFF)
|| ((MAX(len_a, len_b)) / 2 < KARATSUBA_MUL_CUTOFF)) {
goto GO_ON;
}
/*
* Not much effect was observed below a ratio of 1:2, but again: YMMV.
*/
if ((MAX(len_a, len_b) / MIN(len_a, len_b)) < 2) {
goto GO_ON;
}
res = mp_balance_mul(a,b,c);
goto END;
GO_ON:
#endif
/* use Toom-Cook? */
#ifdef BN_MP_TOOM_MUL_C
@ -45,7 +79,9 @@ int mp_mul(const mp_int *a, const mp_int *b, mp_int *c)
#endif
}
}
END:
c->sign = (c->used > 0) ? neg : MP_ZPOS;
return res;
}
#endif

File diff suppressed because it is too large Load Diff

View File

@ -1602,7 +1602,46 @@ LTM_ERR:
return EXIT_FAILURE;
}
static int test_mp_balance_mul(void)
{
mp_int a, b, c;
int e = MP_OKAY;
const char *na =
"4b0I5uMTujCysw+1OOuOyH2FX2WymrHUqi8BBDb7XpkV/4i7vXTbEYUy/kdIfCKu5jT5JEqYkdmnn3jAYo8XShPzNLxZx9yoLjxYRyptSuOI2B1DspvbIVYXY12sxPZ4/HCJ4Usm2MU5lO/006KnDMxuxiv1rm6YZJZ0eZU";
const char *nb = "3x9vs0yVi4hIq7poAeVcggC3WoRt0zRLKO";
const char *nc =
"HzrSq9WVt1jDTVlwUxSKqxctu2GVD+N8+SVGaPFRqdxyld6IxDBbj27BPJzYUdR96k3sWpkO8XnDBvupGPnehpQe4KlO/KmN1PjFov/UTZYM+LYzkFcBPyV6hkkL8ePC1rlFLAHzgJMBCXVp4mRqtkQrDsZXXlcqlbTFu69wF6zDEysiX2cAtn/kP9ldblJiwYPCD8hG";
if ((e = mp_init_multi(&a, &b, &c, NULL)) != MP_OKAY) {
goto LTM_ERR;
}
if ((e = mp_read_radix(&a, na, 64)) != MP_OKAY) {
goto LTM_ERR;
}
if ((e = mp_read_radix(&b, nb, 64)) != MP_OKAY) {
goto LTM_ERR;
}
if ((e = mp_mul(&a, &b, &c)) != MP_OKAY) {
goto LTM_ERR;
}
if ((e = mp_read_radix(&b, nc, 64)) != MP_OKAY) {
goto LTM_ERR;
}
if (mp_cmp(&b, &c) != MP_EQ) {
goto LTM_ERR;
}
mp_clear_multi(&a, &b, &c, NULL);
return EXIT_SUCCESS;
LTM_ERR:
mp_clear_multi(&a, &b, &c, NULL);
return EXIT_FAILURE;
}
int unit_tests(void)
{
@ -1638,7 +1677,8 @@ int unit_tests(void)
T(mp_tc_or),
T(mp_tc_xor),
T(mp_incr),
T(mp_decr)
T(mp_decr),
T(mp_balance_mul)
#undef T
};
unsigned long i;

View File

@ -360,6 +360,10 @@
RelativePath="bn_mp_and.c"
>
</File>
<File
RelativePath="bn_mp_balance_mul.c"
>
</File>
<File
RelativePath="bn_mp_clamp.c"
>

View File

@ -28,9 +28,9 @@ LCOV_ARGS=--directory .
#START_INS
OBJECTS=bn_error.o bn_fast_mp_invmod.o bn_fast_mp_montgomery_reduce.o bn_fast_s_mp_mul_digs.o \
bn_fast_s_mp_mul_high_digs.o bn_fast_s_mp_sqr.o bn_mp_2expt.o bn_mp_abs.o bn_mp_add.o bn_mp_add_d.o \
bn_mp_addmod.o bn_mp_and.o bn_mp_clamp.o bn_mp_clear.o bn_mp_clear_multi.o bn_mp_cmp.o bn_mp_cmp_d.o \
bn_mp_cmp_mag.o bn_mp_cnt_lsb.o bn_mp_complement.o bn_mp_copy.o bn_mp_count_bits.o bn_mp_decr.o \
bn_mp_div.o bn_mp_div_2.o bn_mp_div_2d.o bn_mp_div_3.o bn_mp_div_d.o bn_mp_dr_is_modulus.o \
bn_mp_addmod.o bn_mp_and.o bn_mp_balance_mul.o bn_mp_clamp.o bn_mp_clear.o bn_mp_clear_multi.o bn_mp_cmp.o \
bn_mp_cmp_d.o bn_mp_cmp_mag.o bn_mp_cnt_lsb.o bn_mp_complement.o bn_mp_copy.o bn_mp_count_bits.o \
bn_mp_decr.o bn_mp_div.o bn_mp_div_2.o bn_mp_div_2d.o bn_mp_div_3.o bn_mp_div_d.o bn_mp_dr_is_modulus.o \
bn_mp_dr_reduce.o bn_mp_dr_setup.o bn_mp_exch.o bn_mp_export.o bn_mp_expt_d.o bn_mp_expt_d_ex.o \
bn_mp_exptmod.o bn_mp_exptmod_fast.o bn_mp_exteuclid.o bn_mp_fread.o bn_mp_fwrite.o bn_mp_gcd.o \
bn_mp_get_bit.o bn_mp_get_double.o bn_mp_get_int.o bn_mp_get_long.o bn_mp_get_long_long.o bn_mp_grow.o \

View File

@ -31,9 +31,9 @@ LIBMAIN_D =libtommath.dll
#List of objects to compile (all goes to libtommath.a)
OBJECTS=bn_error.o bn_fast_mp_invmod.o bn_fast_mp_montgomery_reduce.o bn_fast_s_mp_mul_digs.o \
bn_fast_s_mp_mul_high_digs.o bn_fast_s_mp_sqr.o bn_mp_2expt.o bn_mp_abs.o bn_mp_add.o bn_mp_add_d.o \
bn_mp_addmod.o bn_mp_and.o bn_mp_clamp.o bn_mp_clear.o bn_mp_clear_multi.o bn_mp_cmp.o bn_mp_cmp_d.o \
bn_mp_cmp_mag.o bn_mp_cnt_lsb.o bn_mp_complement.o bn_mp_copy.o bn_mp_count_bits.o bn_mp_decr.o \
bn_mp_div.o bn_mp_div_2.o bn_mp_div_2d.o bn_mp_div_3.o bn_mp_div_d.o bn_mp_dr_is_modulus.o \
bn_mp_addmod.o bn_mp_and.o bn_mp_balance_mul.o bn_mp_clamp.o bn_mp_clear.o bn_mp_clear_multi.o bn_mp_cmp.o \
bn_mp_cmp_d.o bn_mp_cmp_mag.o bn_mp_cnt_lsb.o bn_mp_complement.o bn_mp_copy.o bn_mp_count_bits.o \
bn_mp_decr.o bn_mp_div.o bn_mp_div_2.o bn_mp_div_2d.o bn_mp_div_3.o bn_mp_div_d.o bn_mp_dr_is_modulus.o \
bn_mp_dr_reduce.o bn_mp_dr_setup.o bn_mp_exch.o bn_mp_export.o bn_mp_expt_d.o bn_mp_expt_d_ex.o \
bn_mp_exptmod.o bn_mp_exptmod_fast.o bn_mp_exteuclid.o bn_mp_fread.o bn_mp_fwrite.o bn_mp_gcd.o \
bn_mp_get_bit.o bn_mp_get_double.o bn_mp_get_int.o bn_mp_get_long.o bn_mp_get_long_long.o bn_mp_grow.o \

View File

@ -23,9 +23,9 @@ LIBMAIN_S =tommath.lib
#List of objects to compile (all goes to tommath.lib)
OBJECTS=bn_error.obj bn_fast_mp_invmod.obj bn_fast_mp_montgomery_reduce.obj bn_fast_s_mp_mul_digs.obj \
bn_fast_s_mp_mul_high_digs.obj bn_fast_s_mp_sqr.obj bn_mp_2expt.obj bn_mp_abs.obj bn_mp_add.obj bn_mp_add_d.obj \
bn_mp_addmod.obj bn_mp_and.obj bn_mp_clamp.obj bn_mp_clear.obj bn_mp_clear_multi.obj bn_mp_cmp.obj bn_mp_cmp_d.obj \
bn_mp_cmp_mag.obj bn_mp_cnt_lsb.obj bn_mp_complement.obj bn_mp_copy.obj bn_mp_count_bits.obj bn_mp_decr.obj \
bn_mp_div.obj bn_mp_div_2.obj bn_mp_div_2d.obj bn_mp_div_3.obj bn_mp_div_d.obj bn_mp_dr_is_modulus.obj \
bn_mp_addmod.obj bn_mp_and.obj bn_mp_balance_mul.obj bn_mp_clamp.obj bn_mp_clear.obj bn_mp_clear_multi.obj bn_mp_cmp.obj \
bn_mp_cmp_d.obj bn_mp_cmp_mag.obj bn_mp_cnt_lsb.obj bn_mp_complement.obj bn_mp_copy.obj bn_mp_count_bits.obj \
bn_mp_decr.obj bn_mp_div.obj bn_mp_div_2.obj bn_mp_div_2d.obj bn_mp_div_3.obj bn_mp_div_d.obj bn_mp_dr_is_modulus.obj \
bn_mp_dr_reduce.obj bn_mp_dr_setup.obj bn_mp_exch.obj bn_mp_export.obj bn_mp_expt_d.obj bn_mp_expt_d_ex.obj \
bn_mp_exptmod.obj bn_mp_exptmod_fast.obj bn_mp_exteuclid.obj bn_mp_fread.obj bn_mp_fwrite.obj bn_mp_gcd.obj \
bn_mp_get_bit.obj bn_mp_get_double.obj bn_mp_get_int.obj bn_mp_get_long.obj bn_mp_get_long_long.obj bn_mp_grow.obj \

View File

@ -25,9 +25,9 @@ LCOV_ARGS=--directory .libs --directory .
#START_INS
OBJECTS=bn_error.o bn_fast_mp_invmod.o bn_fast_mp_montgomery_reduce.o bn_fast_s_mp_mul_digs.o \
bn_fast_s_mp_mul_high_digs.o bn_fast_s_mp_sqr.o bn_mp_2expt.o bn_mp_abs.o bn_mp_add.o bn_mp_add_d.o \
bn_mp_addmod.o bn_mp_and.o bn_mp_clamp.o bn_mp_clear.o bn_mp_clear_multi.o bn_mp_cmp.o bn_mp_cmp_d.o \
bn_mp_cmp_mag.o bn_mp_cnt_lsb.o bn_mp_complement.o bn_mp_copy.o bn_mp_count_bits.o bn_mp_decr.o \
bn_mp_div.o bn_mp_div_2.o bn_mp_div_2d.o bn_mp_div_3.o bn_mp_div_d.o bn_mp_dr_is_modulus.o \
bn_mp_addmod.o bn_mp_and.o bn_mp_balance_mul.o bn_mp_clamp.o bn_mp_clear.o bn_mp_clear_multi.o bn_mp_cmp.o \
bn_mp_cmp_d.o bn_mp_cmp_mag.o bn_mp_cnt_lsb.o bn_mp_complement.o bn_mp_copy.o bn_mp_count_bits.o \
bn_mp_decr.o bn_mp_div.o bn_mp_div_2.o bn_mp_div_2d.o bn_mp_div_3.o bn_mp_div_d.o bn_mp_dr_is_modulus.o \
bn_mp_dr_reduce.o bn_mp_dr_setup.o bn_mp_exch.o bn_mp_export.o bn_mp_expt_d.o bn_mp_expt_d_ex.o \
bn_mp_exptmod.o bn_mp_exptmod_fast.o bn_mp_exteuclid.o bn_mp_fread.o bn_mp_fwrite.o bn_mp_gcd.o \
bn_mp_get_bit.o bn_mp_get_double.o bn_mp_get_int.o bn_mp_get_long.o bn_mp_get_long_long.o bn_mp_grow.o \

View File

@ -32,9 +32,9 @@ LIBMAIN_S = libtommath.a
OBJECTS=bn_error.o bn_fast_mp_invmod.o bn_fast_mp_montgomery_reduce.o bn_fast_s_mp_mul_digs.o \
bn_fast_s_mp_mul_high_digs.o bn_fast_s_mp_sqr.o bn_mp_2expt.o bn_mp_abs.o bn_mp_add.o bn_mp_add_d.o \
bn_mp_addmod.o bn_mp_and.o bn_mp_clamp.o bn_mp_clear.o bn_mp_clear_multi.o bn_mp_cmp.o bn_mp_cmp_d.o \
bn_mp_cmp_mag.o bn_mp_cnt_lsb.o bn_mp_complement.o bn_mp_copy.o bn_mp_count_bits.o bn_mp_decr.o \
bn_mp_div.o bn_mp_div_2.o bn_mp_div_2d.o bn_mp_div_3.o bn_mp_div_d.o bn_mp_dr_is_modulus.o \
bn_mp_addmod.o bn_mp_and.o bn_mp_balance_mul.o bn_mp_clamp.o bn_mp_clear.o bn_mp_clear_multi.o bn_mp_cmp.o \
bn_mp_cmp_d.o bn_mp_cmp_mag.o bn_mp_cnt_lsb.o bn_mp_complement.o bn_mp_copy.o bn_mp_count_bits.o \
bn_mp_decr.o bn_mp_div.o bn_mp_div_2.o bn_mp_div_2d.o bn_mp_div_3.o bn_mp_div_d.o bn_mp_dr_is_modulus.o \
bn_mp_dr_reduce.o bn_mp_dr_setup.o bn_mp_exch.o bn_mp_export.o bn_mp_expt_d.o bn_mp_expt_d_ex.o \
bn_mp_exptmod.o bn_mp_exptmod_fast.o bn_mp_exteuclid.o bn_mp_fread.o bn_mp_fwrite.o bn_mp_gcd.o \
bn_mp_get_bit.o bn_mp_get_double.o bn_mp_get_int.o bn_mp_get_long.o bn_mp_get_long_long.o bn_mp_grow.o \

View File

@ -335,6 +335,7 @@ int mp_sub(const mp_int *a, const mp_int *b, mp_int *c);
/* c = a * b */
int mp_mul(const mp_int *a, const mp_int *b, mp_int *c);
int mp_balance_mul(const mp_int *a, const mp_int *b, mp_int *c);
/* b = a*a */
int mp_sqr(const mp_int *a, mp_int *b);

View File

@ -22,6 +22,7 @@
# define BN_MP_ADD_D_C
# define BN_MP_ADDMOD_C
# define BN_MP_AND_C
# define BN_MP_BALANCE_MUL_C
# define BN_MP_CLAMP_C
# define BN_MP_CLEAR_C
# define BN_MP_CLEAR_MULTI_C
@ -231,6 +232,17 @@
# define BN_MP_CLEAR_C
#endif
#if defined(BN_MP_BALANCE_MUL_C)
# define BN_MP_INIT_SIZE_C
# define BN_MP_INIT_MULTI_C
# define BN_MP_CLEAR_C
# define BN_MP_MUL_C
# define BN_MP_LSHD_C
# define BN_MP_ADD_C
# define BN_MP_EXCH_C
# define BN_MP_CLEAR_MULTI_C
#endif
#if defined(BN_MP_CLAMP_C)
#endif
@ -624,9 +636,10 @@
#endif
#if defined(BN_MP_MUL_C)
# define BN_FAST_S_MP_MUL_DIGS_C
# define BN_MP_BALANCE_MUL_C
# define BN_MP_TOOM_MUL_C
# define BN_MP_KARATSUBA_MUL_C
# define BN_FAST_S_MP_MUL_DIGS_C
# define BN_S_MP_MUL_C
# define BN_S_MP_MUL_DIGS_C
#endif