From 485c15c1ca4a581b3796ecbd8953bda28ead5c59 Mon Sep 17 00:00:00 2001 From: Jakob Kummerow Date: Thu, 15 Jul 2021 18:20:09 +0200 Subject: [PATCH] [bigint] Barrett-Newton division Dividing by first computing a multiplicative inverse is faster than Burnikel-Ziegler division for very large inputs. Bug: v8:11515 Change-Id: Ice45690c3fa4eef7102d418cdd3d82a942a076c5 Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3015573 Commit-Queue: Jakob Kummerow Reviewed-by: Maya Lekova Cr-Commit-Position: refs/heads/master@{#75743} --- BUILD.bazel | 1 + BUILD.gn | 1 + src/bigint/bigint-internal.cc | 17 ++ src/bigint/bigint-internal.h | 10 + src/bigint/bigint.h | 9 +- src/bigint/div-barrett.cc | 378 +++++++++++++++++++++++++++++++++ src/bigint/mul-karatsuba.cc | 2 + src/bigint/vector-arithmetic.h | 3 + test/bigint/bigint-shell.cc | 74 ++++++- 9 files changed, 487 insertions(+), 8 deletions(-) create mode 100644 src/bigint/div-barrett.cc diff --git a/BUILD.bazel b/BUILD.bazel index 9583948835..17d96d42f2 100644 --- a/BUILD.bazel +++ b/BUILD.bazel @@ -2673,6 +2673,7 @@ filegroup( "src/bigint/bigint-internal.h", "src/bigint/bigint.h", "src/bigint/digit-arithmetic.h", + "src/bigint/div-barrett.cc", "src/bigint/div-burnikel.cc", "src/bigint/div-helpers.cc", "src/bigint/div-helpers.h", diff --git a/BUILD.gn b/BUILD.gn index d0a7a56847..b2cadd5713 100644 --- a/BUILD.gn +++ b/BUILD.gn @@ -4969,6 +4969,7 @@ v8_source_set("v8_bigint") { if (v8_advanced_bigint_algorithms) { sources += [ + "src/bigint/div-barrett.cc", "src/bigint/mul-fft.cc", "src/bigint/mul-toom.cc", ] diff --git a/src/bigint/bigint-internal.cc b/src/bigint/bigint-internal.cc index 828a450e8a..fc501f5a9a 100644 --- a/src/bigint/bigint-internal.cc +++ b/src/bigint/bigint-internal.cc @@ -58,7 +58,16 @@ void ProcessorImpl::Divide(RWDigits Q, Digits A, Digits B) { if (B.len() < kBurnikelThreshold) { return DivideSchoolbook(Q, RWDigits(nullptr, 0), A, B); } +#if !V8_ADVANCED_BIGINT_ALGORITHMS return DivideBurnikelZiegler(Q, RWDigits(nullptr, 0), A, B); +#else + if (B.len() < kBarrettThreshold || A.len() == B.len()) { + DivideBurnikelZiegler(Q, RWDigits(nullptr, 0), A, B); + } else { + ScratchDigits R(B.len()); + DivideBarrett(Q, R, A, B); + } +#endif } void ProcessorImpl::Modulo(RWDigits R, Digits A, Digits B) { @@ -84,7 +93,15 @@ void ProcessorImpl::Modulo(RWDigits R, Digits A, Digits B) { } int q_len = DivideResultLength(A, B); ScratchDigits Q(q_len); +#if !V8_ADVANCED_BIGINT_ALGORITHMS return DivideBurnikelZiegler(Q, R, A, B); +#else + if (B.len() < kBarrettThreshold || A.len() == B.len()) { + DivideBurnikelZiegler(Q, R, A, B); + } else { + DivideBarrett(Q, R, A, B); + } +#endif } Status Processor::Multiply(RWDigits Z, Digits X, Digits Y) { diff --git a/src/bigint/bigint-internal.h b/src/bigint/bigint-internal.h index 41ef9526e5..193f311f28 100644 --- a/src/bigint/bigint-internal.h +++ b/src/bigint/bigint-internal.h @@ -18,6 +18,8 @@ constexpr int kFftThreshold = 1500; constexpr int kFftInnerThreshold = 200; constexpr int kBurnikelThreshold = 57; +constexpr int kNewtonInversionThreshold = 50; +// kBarrettThreshold is defined in bigint.h. class ProcessorImpl : public Processor { public: @@ -47,6 +49,14 @@ class ProcessorImpl : public Processor { void Toom3Main(RWDigits Z, Digits X, Digits Y); void MultiplyFFT(RWDigits Z, Digits X, Digits Y); + + void DivideBarrett(RWDigits Q, RWDigits R, Digits A, Digits B); + void DivideBarrett(RWDigits Q, RWDigits R, Digits A, Digits B, Digits I, + RWDigits scratch); + + void Invert(RWDigits Z, Digits V, RWDigits scratch); + void InvertBasecase(RWDigits Z, Digits V, RWDigits scratch); + void InvertNewton(RWDigits Z, Digits V, RWDigits scratch); #endif // V8_ADVANCED_BIGINT_ALGORITHMS // {out_length} initially contains the allocated capacity of {out}, and diff --git a/src/bigint/bigint.h b/src/bigint/bigint.h index 6d3790808c..78ac9a9d05 100644 --- a/src/bigint/bigint.h +++ b/src/bigint/bigint.h @@ -274,8 +274,15 @@ inline int SubtractSignedResultLength(int x_length, int y_length, inline int MultiplyResultLength(Digits X, Digits Y) { return X.len() + Y.len(); } +constexpr int kBarrettThreshold = 13310; inline int DivideResultLength(Digits A, Digits B) { - return A.len() - B.len() + 1; +#if V8_ADVANCED_BIGINT_ALGORITHMS + // The Barrett division algorithm needs one extra digit for temporary use. + int kBarrettExtraScratch = B.len() >= kBarrettThreshold ? 1 : 0; +#else + constexpr int kBarrettExtraScratch = 0; +#endif + return A.len() - B.len() + 1 + kBarrettExtraScratch; } inline int ModuloResultLength(Digits B) { return B.len(); } diff --git a/src/bigint/div-barrett.cc b/src/bigint/div-barrett.cc new file mode 100644 index 0000000000..6947a5b68e --- /dev/null +++ b/src/bigint/div-barrett.cc @@ -0,0 +1,378 @@ +// Copyright 2021 the V8 project authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +// Barrett division, finding the inverse with Newton's method. +// Reference: "Fast Division of Large Integers" by Karl Hasselström, +// found at https://treskal.com/s/masters-thesis.pdf + +// Many thanks to Karl Wiberg, k@w5.se, for both writing up an +// understandable theoretical description of the algorithm and privately +// providing a demo implementation, on which the implementation in this file is +// based. + +#include + +#include "src/bigint/bigint-internal.h" +#include "src/bigint/digit-arithmetic.h" +#include "src/bigint/div-helpers.h" +#include "src/bigint/vector-arithmetic.h" + +namespace v8 { +namespace bigint { + +namespace { + +constexpr int DivideBarrettScratchSpace(int n) { return n + 2; } + +// Local values S and W need "n plus a few" digits; U needs 2*n "plus a few". +// In all tested cases the "few" were either 2 or 3, so give 5 to be safe. +// S and W are not live at the same time. +constexpr int kInvertNewtonExtraSpace = 5; +constexpr int InvertNewtonScratchSpace(int n) { + return 3 * n + 2 * kInvertNewtonExtraSpace; +} + +constexpr int InvertScratchSpace(int n) { + return n < kNewtonInversionThreshold ? 2 * n : InvertNewtonScratchSpace(n); +} + +void DcheckIntegerPartRange(Digits X, digit_t min, digit_t max) { +#if DEBUG + digit_t integer_part = X.msd(); + DCHECK(integer_part >= min); + DCHECK(integer_part <= max); +#else + USE(X); + USE(min); + USE(max); +#endif +} + +} // namespace + +// Z := (the fractional part of) 1/V, via naive division. +// See comments at {Invert} and {InvertNewton} below for details. +void ProcessorImpl::InvertBasecase(RWDigits Z, Digits V, RWDigits scratch) { + DCHECK(Z.len() > V.len()); + DCHECK(V.len() > 0); // NOLINT(readability/check) + DCHECK(scratch.len() >= 2 * V.len()); + int n = V.len(); + RWDigits X(scratch, 0, 2 * n); + digit_t borrow = 0; + int i = 0; + for (; i < n; i++) X[i] = 0; + for (; i < 2 * n; i++) X[i] = digit_sub2(0, V[i - n], borrow, &borrow); + DCHECK(borrow == 1); // NOLINT(readability/check) + RWDigits R(nullptr, 0); // We don't need the remainder. + if (n < kBurnikelThreshold) { + DivideSchoolbook(Z, R, X, V); + } else { + DivideBurnikelZiegler(Z, R, X, V); + } +} + +// This is Algorithm 4.2 from the paper. +// Computes the inverse of V, shifted by kDigitBits * 2 * V.len, accurate to +// V.len+1 digits. The V.len low digits of the result digits will be written +// to Z, plus there is an implicit top digit with value 1. +// Needs InvertNewtonScratchSpace(V.len) of scratch space. +// The result is either correct or off by one (about half the time it is +// correct, half the time it is one too much, and in the corner case where V is +// minimal and the implicit top digit would have to be 2 it is one too little). +// Barrett's division algorithm can handle that, so we don't care. +void ProcessorImpl::InvertNewton(RWDigits Z, Digits V, RWDigits scratch) { + const int vn = V.len(); + DCHECK(Z.len() >= vn); + DCHECK(scratch.len() >= InvertNewtonScratchSpace(vn)); + const int kSOffset = 0; + const int kWOffset = 0; // S and W can share their scratch space. + const int kUOffset = vn + kInvertNewtonExtraSpace; + + // The base case won't work otherwise. + DCHECK(V.len() >= 3); // NOLINT(readability/check) + + constexpr int kBasecasePrecision = kNewtonInversionThreshold - 1; + // V must have more digits than the basecase. + DCHECK(V.len() > kBasecasePrecision); + DCHECK(IsBitNormalized(V)); + + // Step (1): Setup. + // Calculate precision required at each step. + // {k} is the number of fraction bits for the current iteration. + int k = vn * kDigitBits; + int target_fraction_bits[8 * sizeof(vn)]; // "k_i" in the paper. + int iteration = -1; // "i" in the paper, except inverted to run downwards. + while (k > kBasecasePrecision * kDigitBits) { + iteration++; + target_fraction_bits[iteration] = k; + k = DIV_CEIL(k, 2); + } + // At this point, k <= kBasecasePrecision*kDigitBits is the number of + // fraction bits to use in the base case. {iteration} is the highest index + // in use for f[]. + + // Step (2): Initial approximation. + int initial_digits = DIV_CEIL(k + 1, kDigitBits); + Digits top_part_of_v(V, vn - initial_digits, initial_digits); + InvertBasecase(Z, top_part_of_v, scratch); + Z[initial_digits] = Z[initial_digits] + 1; // Implicit top digit. + // From now on, we'll keep Z.len updated to the part that's already computed. + Z.set_len(initial_digits + 1); + + // Step (3): Precision doubling loop. + while (true) { + DcheckIntegerPartRange(Z, 1, 2); + + // (3b): S = Z^2 + RWDigits S(scratch, kSOffset, 2 * Z.len()); + Multiply(S, Z, Z); + if (should_terminate()) return; + S.TrimOne(); // Top digit of S is unused. + DcheckIntegerPartRange(S, 1, 4); + + // (3c): T = V, truncated so that at least 2k+3 fraction bits remain. + int fraction_digits = DIV_CEIL(2 * k + 3, kDigitBits); + int t_len = std::min(V.len(), fraction_digits); + Digits T(V, V.len() - t_len, t_len); + + // (3d): U = T * S, truncated so that at least 2k+1 fraction bits remain + // (U has one integer digit, which might be zero). + RWDigits U(scratch, kUOffset, S.len() + T.len()); + DCHECK(U.len() > fraction_digits); + Multiply(U, S, T); + if (should_terminate()) return; + U = U + (U.len() - (1 + fraction_digits)); + DcheckIntegerPartRange(U, 0, 3); + + // (3e): W = 2 * Z, padded with "0" fraction bits so that it has the + // same number of fraction bits as U. + DCHECK(U.len() >= Z.len()); + RWDigits W(scratch, kWOffset, U.len()); + int padding_digits = U.len() - Z.len(); + for (int i = 0; i < padding_digits; i++) W[i] = 0; + LeftShift(W + padding_digits, Z, 1); + DcheckIntegerPartRange(W, 2, 4); + + // (3f): Z = W - U. + // This check is '<=' instead of '<' because U's top digit is its + // integer part, and we want vn fraction digits. + if (U.len() <= vn) { + // Normal subtraction. + // This is not the last iteration. + DCHECK(iteration > 0); // NOLINT(readability/check) + Z.set_len(U.len()); + digit_t borrow = SubtractAndReturnBorrow(Z, W, U); + DCHECK(borrow == 0); // NOLINT(readability/check) + USE(borrow); + DcheckIntegerPartRange(Z, 1, 2); + } else { + // Truncate some least significant digits so that we get vn + // fraction digits, and compute the integer digit separately. + // This is the last iteration. + DCHECK(iteration == 0); // NOLINT(readability/check) + Z.set_len(vn); + Digits W_part(W, W.len() - vn - 1, vn); + Digits U_part(U, U.len() - vn - 1, vn); + digit_t borrow = SubtractAndReturnBorrow(Z, W_part, U_part); + digit_t integer_part = W.msd() - U.msd() - borrow; + DCHECK(integer_part == 1 || integer_part == 2); + if (integer_part == 2) { + // This is the rare case where the correct result would be 2.0, but + // since we can't express that by returning only the fractional part + // with an implicit 1-digit, we have to return [1.]9999... instead. + for (int i = 0; i < Z.len(); i++) Z[i] = ~digit_t{0}; + } + break; + } + // (3g, 3h): Update local variables and loop. + k = target_fraction_bits[iteration]; + iteration--; + } +} + +// Computes the inverse of V, shifted by kDigitBits * 2 * V.len, accurate to +// V.len+1 digits. The V.len low digits of the result digits will be written +// to Z, plus there is an implicit top digit with value 1. +// (Corner case: if V is minimal, the implicit digit should be 2; in that case +// we return one less than the correct answer. DivideBarrett can handle that.) +// Needs InvertScratchSpace(V.len) digits of scratch space. +void ProcessorImpl::Invert(RWDigits Z, Digits V, RWDigits scratch) { + DCHECK(Z.len() > V.len()); + DCHECK(V.len() >= 1); // NOLINT(readability/check) + DCHECK(IsBitNormalized(V)); + DCHECK(scratch.len() >= InvertScratchSpace(V.len())); + + int vn = V.len(); + if (vn >= kNewtonInversionThreshold) { + return InvertNewton(Z, V, scratch); + } + if (vn == 1) { + digit_t d = V[0]; + digit_t dummy_remainder; + Z[0] = digit_div(~d, ~digit_t{0}, d, &dummy_remainder); + Z[1] = 0; + } else { + InvertBasecase(Z, V, scratch); + if (Z[vn] == 1) { + for (int i = 0; i < vn; i++) Z[i] = ~digit_t{0}; + } + } +} + +// This is algorithm 3.5 from the paper. +// Computes Q(uotient) and R(emainder) for A/B using I, which is a +// precomputed approximation of 1/B (e.g. with Invert() above). +// Needs DivideBarrettScratchSpace(A.len) scratch space. +void ProcessorImpl::DivideBarrett(RWDigits Q, RWDigits R, Digits A, Digits B, + Digits I, RWDigits scratch) { + DCHECK(Q.len() > A.len() - B.len()); + DCHECK(R.len() >= B.len()); + DCHECK(A.len() > B.len()); // Careful: This is *not* '>=' ! + DCHECK(A.len() <= 2 * B.len()); + DCHECK(B.len() > 0); // NOLINT(readability/check) + DCHECK(IsBitNormalized(B)); + DCHECK(I.len() == A.len() - B.len()); + DCHECK(scratch.len() >= DivideBarrettScratchSpace(A.len())); + + int orig_q_len = Q.len(); + + // (1): A1 = A with B.len fewer digits. + Digits A1 = A + B.len(); + DCHECK(A1.len() == I.len()); + + // (2): Q = A1*I with I.len fewer digits. + // {I} has an implicit high digit with value 1, so we add {A1} to the high + // part of the multiplication result. + RWDigits K(scratch, 0, 2 * I.len()); + Multiply(K, A1, I); + if (should_terminate()) return; + Q.set_len(I.len() + 1); + Add(Q, K + I.len(), A1); + // K is no longer used, can re-use {scratch} for P. + + // (3): R = A - B*Q (approximate remainder). + RWDigits P(scratch, 0, A.len() + 1); + Multiply(P, B, Q); + if (should_terminate()) return; + digit_t borrow = SubtractAndReturnBorrow(R, A, Digits(P, 0, B.len())); + // R may be allocated wider than B, zero out any extra digits if so. + for (int i = B.len(); i < R.len(); i++) R[i] = 0; + digit_t r_high = A[B.len()] - P[B.len()] - borrow; + + // Adjust R and Q so that they become the correct remainder and quotient. + // The number of iterations is guaranteed to be at most some very small + // constant, unless the caller gave us a bad approximate quotient. + if (r_high >> (kDigitBits - 1) == 1) { + // (5b): R < 0, so R += B + digit_t q_sub = 0; + do { + r_high += AddAndReturnCarry(R, R, B); + q_sub++; + DCHECK(q_sub <= 5); // NOLINT(readability/check) + } while (r_high != 0); + Subtract(Q, q_sub); + } else { + digit_t q_add = 0; + while (r_high != 0 || GreaterThanOrEqual(R, B)) { + // (5c): R >= B, so R -= B + r_high -= SubtractAndReturnBorrow(R, R, B); + q_add++; + DCHECK(q_add <= 5); // NOLINT(readability/check) + } + Add(Q, q_add); + } + // (5a): Return. + int final_q_len = Q.len(); + Q.set_len(orig_q_len); + for (int i = final_q_len; i < orig_q_len; i++) Q[i] = 0; +} + +// Computes Q(uotient) and R(emainder) for A/B, using Barrett division. +void ProcessorImpl::DivideBarrett(RWDigits Q, RWDigits R, Digits A, Digits B) { + DCHECK(Q.len() > A.len() - B.len()); + DCHECK(R.len() >= B.len()); + DCHECK(A.len() > B.len()); // Careful: This is *not* '>=' ! + DCHECK(B.len() > 0); // NOLINT(readability/check) + + // Normalize B, and shift A by the same amount. + ShiftedDigits b_normalized(B); + ShiftedDigits a_normalized(A, b_normalized.shift()); + // Keep the code below more concise. + B = b_normalized; + A = a_normalized; + + // The core DivideBarrett function above only supports A having at most + // twice as many digits as B. We generalize this to arbitrary inputs + // similar to Burnikel-Ziegler division by performing a t-by-1 division + // of B-sized chunks. It's easy to special-case the situation where we + // don't need to bother. + int barrett_dividend_length = A.len() <= 2 * B.len() ? A.len() : 2 * B.len(); + int i_len = barrett_dividend_length - B.len(); + ScratchDigits I(i_len + 1); // +1 is for temporary use by Invert(). + int scratch_len = + std::max(InvertScratchSpace(i_len), + DivideBarrettScratchSpace(barrett_dividend_length)); + ScratchDigits scratch(scratch_len); + Invert(I, Digits(B, B.len() - i_len, i_len), scratch); + if (should_terminate()) return; + I.TrimOne(); + DCHECK(I.len() == i_len); + if (A.len() > 2 * B.len()) { + // This follows the variable names and and algorithmic steps of + // DivideBurnikelZiegler(). + int n = B.len(); // Chunk length. + // (5): {t} is the number of B-sized chunks of A. + int t = DIV_CEIL(A.len(), n); + DCHECK(t >= 3); // NOLINT(readability/check) + // (6)/(7): Z is used for the current 2-chunk block to be divided by B, + // initialized to the two topmost chunks of A. + int z_len = n * 2; + ScratchDigits Z(z_len); + PutAt(Z, A + n * (t - 2), z_len); + // (8): For i from t-2 downto 0 do + int qi_len = n + 1; + ScratchDigits Qi(qi_len); + ScratchDigits Ri(n); + // First iteration unrolled and specialized. + { + int i = t - 2; + DivideBarrett(Qi, Ri, Z, B, I, scratch); + if (should_terminate()) return; + RWDigits target = Q + n * i; + // In the first iteration, all qi_len = n + 1 digits may be used. + int to_copy = std::min(qi_len, target.len()); + for (int j = 0; j < to_copy; j++) target[j] = Qi[j]; + for (int j = to_copy; j < target.len(); j++) target[j] = 0; +#if DEBUG + for (int j = to_copy; j < Qi.len(); j++) { + DCHECK(Qi[j] == 0); // NOLINT(readability/check) + } +#endif + } + // Now loop over any remaining iterations. + for (int i = t - 3; i >= 0; i--) { + // (8b): If i > 0, set Z_(i-1) = [Ri, A_(i-1)]. + // (De-duped with unrolled first iteration, hence reading A_(i).) + PutAt(Z + n, Ri, n); + PutAt(Z, A + n * i, n); + // (8a): Compute Qi, Ri such that Zi = B*Qi + Ri. + DivideBarrett(Qi, Ri, Z, B, I, scratch); + DCHECK(Qi[qi_len - 1] == 0); // NOLINT(readability/check) + if (should_terminate()) return; + // (9): Return Q = [Q_(t-2), ..., Q_0]... + PutAt(Q + n * i, Qi, n); + } + Ri.Normalize(); + DCHECK(Ri.len() <= R.len()); + // (9): ...and R = R_0 * 2^(-leading_zeros). + RightShift(R, Ri, b_normalized.shift()); + } else { + DivideBarrett(Q, R, A, B, I, scratch); + if (should_terminate()) return; + RightShift(R, R, b_normalized.shift()); + } +} + +} // namespace bigint +} // namespace v8 diff --git a/src/bigint/mul-karatsuba.cc b/src/bigint/mul-karatsuba.cc index 2a141f213c..d4b5a58383 100644 --- a/src/bigint/mul-karatsuba.cc +++ b/src/bigint/mul-karatsuba.cc @@ -201,5 +201,7 @@ void ProcessorImpl::KaratsubaMain(RWDigits Z, Digits X, Digits Y, USE(overflow); } +#undef MAYBE_TERMINATE + } // namespace bigint } // namespace v8 diff --git a/src/bigint/vector-arithmetic.h b/src/bigint/vector-arithmetic.h index 3247660f95..d8b79a961a 100644 --- a/src/bigint/vector-arithmetic.h +++ b/src/bigint/vector-arithmetic.h @@ -45,6 +45,9 @@ digit_t AddAndReturnCarry(RWDigits Z, Digits X, Digits Y); digit_t SubtractAndReturnBorrow(RWDigits Z, Digits X, Digits Y); inline bool IsDigitNormalized(Digits X) { return X.len() == 0 || X.msd() != 0; } +inline bool IsBitNormalized(Digits X) { + return (X.msd() >> (kDigitBits - 1)) == 1; +} inline bool GreaterThanOrEqual(Digits A, Digits B) { return Compare(A, B) >= 0; diff --git a/test/bigint/bigint-shell.cc b/test/bigint/bigint-shell.cc index b524944625..593bc47dc1 100644 --- a/test/bigint/bigint-shell.cc +++ b/test/bigint/bigint-shell.cc @@ -28,8 +28,11 @@ int PrintHelp(char** argv) { } #define TESTS(V) \ + V(kBarrett, "barrett") \ + V(kBurnikel, "burnikel") \ + V(kFFT, "fft") \ V(kKaratsuba, "karatsuba") \ - V(kBurnikel, "burnikel") V(kToom, "toom") V(kFFT, "fft") + V(kToom, "toom") enum Operation { kNoOp, kList, kTest }; @@ -157,22 +160,26 @@ class Runner { int RunTest() { int count = 0; - if (test_ == kKaratsuba) { + if (test_ == kBarrett) { for (int i = 0; i < runs_; i++) { - TestKaratsuba(&count); + TestBarrett(&count); } } else if (test_ == kBurnikel) { for (int i = 0; i < runs_; i++) { TestBurnikel(&count); } - } else if (test_ == kToom) { - for (int i = 0; i < runs_; i++) { - TestToom(&count); - } } else if (test_ == kFFT) { for (int i = 0; i < runs_; i++) { TestFFT(&count); } + } else if (test_ == kKaratsuba) { + for (int i = 0; i < runs_; i++) { + TestKaratsuba(&count); + } + } else if (test_ == kToom) { + for (int i = 0; i < runs_; i++) { + TestToom(&count); + } } else { DCHECK(false); // Unreachable. } @@ -291,6 +298,56 @@ class Runner { } } +#if V8_ADVANCED_BIGINT_ALGORITHMS + void TestBarrett_Internal(int left_size, int right_size) { + ScratchDigits A(left_size); + ScratchDigits B(right_size); + GenerateRandom(A); + GenerateRandom(B); + int quotient_len = DivideResultLength(A, B); + // {DivideResultLength} doesn't expect to be called for sizes below + // {kBarrettThreshold} (which we do here to save time), so we have to + // manually adjust the allocated result length. + if (B.len() < kBarrettThreshold) quotient_len++; + int remainder_len = right_size; + ScratchDigits quotient(quotient_len); + ScratchDigits quotient_burnikel(quotient_len); + ScratchDigits remainder(remainder_len); + ScratchDigits remainder_burnikel(remainder_len); + processor()->DivideBarrett(quotient, remainder, A, B); + processor()->DivideBurnikelZiegler(quotient_burnikel, remainder_burnikel, A, + B); + AssertEquals(A, B, quotient_burnikel, quotient); + AssertEquals(A, B, remainder_burnikel, remainder); + } + + void TestBarrett(int* count) { + // We pick a range around kBurnikelThreshold (instead of kBarrettThreshold) + // to save test execution time. + constexpr int kMin = kBurnikelThreshold / 2; + constexpr int kMax = 2 * kBurnikelThreshold; + // {DivideBarrett(A, B)} requires that A.len > B.len! + for (int right_size = kMin; right_size <= kMax; right_size++) { + for (int left_size = right_size + 1; left_size <= kMax; left_size++) { + TestBarrett_Internal(left_size, right_size); + if (error_) return; + (*count)++; + } + } + // We also test one random large case. + uint64_t random_bits = rng_.NextUint64(); + int right_size = kBarrettThreshold + static_cast(random_bits & 0x3FF); + random_bits >>= 10; + int left_size = right_size + 1 + static_cast(random_bits & 0x3FFF); + random_bits >>= 14; + TestBarrett_Internal(left_size, right_size); + if (error_) return; + (*count)++; + } +#else + void TestBarrett(int* count) {} +#endif // V8_ADVANCED_BIGINT_ALGORITHMS + int ParseOptions(int argc, char** argv) { for (int i = 1; i < argc; i++) { if (strcmp(argv[i], "--list") == 0) { @@ -325,6 +382,9 @@ class Runner { } private: + // TODO(jkummerow): Also generate "non-random-looking" inputs, i.e. long + // strings of zeros and ones in the binary representation, such as + // ((1 << random) ± 1). void GenerateRandom(RWDigits Z) { if (Z.len() == 0) return; if (sizeof(digit_t) == 8) {