diff --git a/mp_div.c b/mp_div.c index cbc52e8..b092d7b 100644 --- a/mp_div.c +++ b/mp_div.c @@ -26,7 +26,7 @@ mp_err mp_div(const mp_int *a, const mp_int *b, mp_int *c, mp_int *d) } if (MP_HAS(S_MP_DIV_RECURSIVE) - && (b->used > MP_MUL_KARATSUBA_CUTOFF) + && (b->used > (2 * MP_MUL_KARATSUBA_CUTOFF)) && (b->used <= ((a->used/3)*2))) { err = s_mp_div_recursive(a, b, c, d); } else if (MP_HAS(S_MP_DIV_SCHOOL)) { diff --git a/s_mp_div_recursive.c b/s_mp_div_recursive.c index df9a297..d719c4e 100644 --- a/s_mp_div_recursive.c +++ b/s_mp_div_recursive.c @@ -20,7 +20,7 @@ static mp_err s_recursion(const mp_int *a, const mp_int *b, mp_int *q, mp_int *r mp_int A1, A2, B1, B0, Q1, Q0, R1, R0, t; int m = a->used - b->used, k = m/2; - if (m < MP_MUL_KARATSUBA_CUTOFF) { + if (m < (MP_MUL_KARATSUBA_CUTOFF)) { return s_mp_div_school(a, b, q, r); } @@ -43,15 +43,16 @@ static mp_err s_recursion(const mp_int *a, const mp_int *b, mp_int *q, mp_int *r if ((err = mp_sub(&A1, &t, &A1)) != MP_OKAY) goto LBL_ERR; /* while A1 < 0 do Q1 = Q1 - 1, A1 = A1 + (beta^k * B) */ - if ((err = mp_mul_2d(b, k * MP_DIGIT_BIT, &t)) != MP_OKAY) goto LBL_ERR; - while (mp_cmp_d(&A1, 0uL) == MP_LT) { - if ((err = mp_decr(&Q1)) != MP_OKAY) goto LBL_ERR; - if ((err = mp_add(&A1, &t, &A1)) != MP_OKAY) goto LBL_ERR; + if (mp_cmp_d(&A1, 0uL) == MP_LT) { + if ((err = mp_mul_2d(b, k * MP_DIGIT_BIT, &t)) != MP_OKAY) goto LBL_ERR; + do { + if ((err = mp_decr(&Q1)) != MP_OKAY) goto LBL_ERR; + if ((err = mp_add(&A1, &t, &A1)) != MP_OKAY) goto LBL_ERR; + } while (mp_cmp_d(&A1, 0uL) == MP_LT); } - /* (Q0, R0) = RecursiveDivRem(A1 / beta^(k), B1) */ if ((err = mp_div_2d(&A1, k * MP_DIGIT_BIT, &A1, &t)) != MP_OKAY) goto LBL_ERR; - if ((err = s_recursion(&A1, &B1, &Q0, &R0)) != MP_OKAY) goto LBL_ERR; + if ((err = s_recursion(&A1, &B1, &Q0, &R0)) != MP_OKAY) goto LBL_ERR; /* A2 = (R0*beta^k) + (A1 % beta^k) - (Q0*B0) */ if ((err = mp_lshd(&R0, k)) != MP_OKAY) goto LBL_ERR; @@ -65,13 +66,10 @@ static mp_err s_recursion(const mp_int *a, const mp_int *b, mp_int *q, mp_int *r if ((err = mp_add(&A2, b, &A2)) != MP_OKAY) goto LBL_ERR; } /* return q = (Q1*beta^k) + Q0, r = A2 */ - if (q != NULL) { - if ((err = mp_lshd(&Q1, k)) != MP_OKAY) goto LBL_ERR; - if ((err = mp_add(&Q1, &Q0, q)) != MP_OKAY) goto LBL_ERR; - } - if (r != NULL) { - if ((err = mp_copy(&A2, r)) != MP_OKAY) goto LBL_ERR; - } + if ((err = mp_lshd(&Q1, k)) != MP_OKAY) goto LBL_ERR; + if ((err = mp_add(&Q1, &Q0, q)) != MP_OKAY) goto LBL_ERR; + + if ((err = mp_copy(&A2, r)) != MP_OKAY) goto LBL_ERR; LBL_ERR: mp_clear_multi(&A1, &A2, &B1, &B0, &Q1, &Q0, &R1, &R0, &t, NULL); @@ -94,24 +92,7 @@ mp_err s_mp_div_recursive(const mp_int *a, const mp_int *b, mp_int *q, mp_int *r /* most significant bit of a limb */ /* assumes MP_DIGIT_MAX < (sizeof(mp_digit) * CHAR_BIT) */ msb = (MP_DIGIT_MAX + (mp_digit)(1)) >> 1; - - /* - Method to compute sigma shamelessly stolen from - - J. Burnikel and J. Ziegler, "Fast recursive division", Research Report - MPI-I-98-1-022, Max-Planck-Institut fuer Informatik, Saarbruecken, Germany, - October 1998. (available online) - - Vid. section 2.3. - */ - m = MP_MUL_KARATSUBA_CUTOFF; - while (m <= b->used) { - m <<= 1; - } - j = (b->used + m - 1) / m; - n = j * m; - - sigma = MP_DIGIT_BIT * (n - b->used); + sigma = 0; msb_b = b->dp[b->used - 1]; while (msb_b < msb) { sigma++; @@ -139,7 +120,7 @@ mp_err s_mp_div_recursive(const mp_int *a, const mp_int *b, mp_int *q, mp_int *r /* Q = 0 */ mp_zero(&Q); while (m > n) { - /* (q, r) = RecursveDivRem(A / (beta^(m-n)), B) */ + /* (q, r) = RecursiveDivRem(A / (beta^(m-n)), B) */ j = (m - n) * MP_DIGIT_BIT; if ((err = mp_div_2d(&A, j, &A_div, &A_mod)) != MP_OKAY) goto LBL_ERR; if ((err = s_recursion(&A_div, &B, &Q1, &R)) != MP_OKAY) goto LBL_ERR; @@ -152,7 +133,7 @@ mp_err s_mp_div_recursive(const mp_int *a, const mp_int *b, mp_int *q, mp_int *r /* m = m - n */ m = m - n; } - /* (q, r) = RecursveDivRem(A, B) */ + /* (q, r) = RecursiveDivRem(A, B) */ if ((err = s_recursion(&A, &B, &Q1, &R)) != MP_OKAY) goto LBL_ERR; /* Q = (Q * beta^m) + q, R = r */ if ((err = mp_mul_2d(&Q, m * MP_DIGIT_BIT, &Q)) != MP_OKAY) goto LBL_ERR;