mirror of
https://github.com/fmtlib/fmt.git
synced 2024-11-29 13:21:05 +00:00
Reflect the new paper
- Change constants appearing in log & division computations - Rename beta_minus_1 to beta
This commit is contained in:
parent
8e2e4d4034
commit
5d8eb6a1a0
@ -149,9 +149,6 @@ template <> FMT_FUNC int count_digits<4>(detail::fallback_uintptr n) {
|
||||
return i >= 0 ? i * char_digits + count_digits<4, unsigned>(n.value[i]) : 1;
|
||||
}
|
||||
|
||||
// log10(2) = 0x0.4d104d427de7fbcc...
|
||||
static constexpr uint64_t log10_2_significand = 0x4d104d427de7fbcc;
|
||||
|
||||
template <typename T = void> struct basic_impl_data {
|
||||
// Normalized 64-bit significands of pow(10, k), for k = -348, -340, ..., 340.
|
||||
// These are generated by support/compute-powers.py.
|
||||
@ -895,86 +892,72 @@ inline uint64_t umul96_lower64(uint32_t x, uint64_t y) noexcept {
|
||||
// Computes floor(log10(pow(2, e))) for e in [-1700, 1700] using the method from
|
||||
// https://fmt.dev/papers/Grisu-Exact.pdf#page=5, section 3.4.
|
||||
inline int floor_log10_pow2(int e) noexcept {
|
||||
FMT_ASSERT(e <= 1700 && e >= -1700, "too large exponent");
|
||||
FMT_ASSERT(e <= 2620 && e >= -2620, "too large exponent");
|
||||
static_assert((-1 >> 1) == -1, "right shift is not arithmetic");
|
||||
const int shift = 22;
|
||||
return (e * static_cast<int>(log10_2_significand >> (64 - shift))) >> shift;
|
||||
return (e * 315653) >> 20;
|
||||
}
|
||||
|
||||
// Various fast log computations.
|
||||
inline int floor_log2_pow10(int e) noexcept {
|
||||
FMT_ASSERT(e <= 1233 && e >= -1233, "too large exponent");
|
||||
const uint64_t log2_10_integer_part = 3;
|
||||
const uint64_t log2_10_fractional_digits = 0x5269e12f346e2bf9;
|
||||
const int shift_amount = 19;
|
||||
return (e * static_cast<int>(
|
||||
(log2_10_integer_part << shift_amount) |
|
||||
(log2_10_fractional_digits >> (64 - shift_amount)))) >>
|
||||
shift_amount;
|
||||
return (e * 1741647) >> 19;
|
||||
}
|
||||
inline int floor_log10_pow2_minus_log10_4_over_3(int e) noexcept {
|
||||
FMT_ASSERT(e <= 1700 && e >= -1700, "too large exponent");
|
||||
const uint64_t log10_4_over_3_fractional_digits = 0x1ffbfc2bbc780375;
|
||||
const int shift_amount = 22;
|
||||
return (e * static_cast<int>(log10_2_significand >> (64 - shift_amount)) -
|
||||
static_cast<int>(log10_4_over_3_fractional_digits >>
|
||||
(64 - shift_amount))) >>
|
||||
shift_amount;
|
||||
FMT_ASSERT(e <= 2936 && e >= -2985, "too large exponent");
|
||||
return (e * 631305 - 261663) >> 21;
|
||||
}
|
||||
|
||||
static constexpr struct {
|
||||
int divisor;
|
||||
int shift_amount;
|
||||
} div_small_pow10_infos[] = {{10, 16}, {100, 16}};
|
||||
|
||||
// Replaces n by floor(n / pow(10, N)) returning true if and only if n is
|
||||
// divisible by pow(10, N).
|
||||
// Precondition: n <= pow(10, N + 1).
|
||||
template <int N>
|
||||
bool check_divisibility_and_divide_by_pow10(uint32_t& n) noexcept {
|
||||
// The numbers below are chosen such that:
|
||||
// 1. floor(n/d) = floor(nm / 2^(k+l)) where d=10 or d=100,
|
||||
// 2. floor(nm/2^k) mod 2^l = 0 if and only if n is divisible by d,
|
||||
// where m is magic_number, k is margin_bits, l is divisibility_check_bits
|
||||
// 1. floor(n/d) = floor(nm / 2^k) where d=10 or d=100,
|
||||
// 2. nm mod 2^k < m if and only if n is divisible by d,
|
||||
// where m is magic_number, k is shift_amount
|
||||
// and d is divisor.
|
||||
//
|
||||
// Item 1 is a common technique of replacing division by a constant with
|
||||
// multiplication, see e.g. "Division by Invariant Integers Using
|
||||
// Multiplication" by Granlund and Montgomery (1994). magic_number (m) is set
|
||||
// to ceil(2^(k+l)/d) for large enough k+l.
|
||||
// to ceil(2^k/d) for large enough k.
|
||||
// The idea for item 2 originates from Schubfach.
|
||||
static constexpr struct {
|
||||
int divisor;
|
||||
int margin_bits;
|
||||
int divisibility_check_bits;
|
||||
} infos[] = {{10, 8, 8}, {100, 10, 16}};
|
||||
constexpr auto info = infos[N - 1];
|
||||
constexpr auto info = div_small_pow10_infos[N - 1];
|
||||
FMT_ASSERT(n <= info.divisor * 10, "n is too large");
|
||||
constexpr uint32_t magic_number =
|
||||
(1 << (info.margin_bits + info.divisibility_check_bits)) / info.divisor +
|
||||
1;
|
||||
(1u << info.shift_amount) / info.divisor + 1;
|
||||
n *= magic_number;
|
||||
n >>= info.margin_bits;
|
||||
const uint32_t comparison_mask = (1u << info.divisibility_check_bits) - 1;
|
||||
bool result = (n & comparison_mask) == 0;
|
||||
n >>= info.divisibility_check_bits;
|
||||
const uint32_t comparison_mask = (1u << info.shift_amount) - 1;
|
||||
bool result = (n & comparison_mask) < magic_number;
|
||||
n >>= info.shift_amount;
|
||||
return result;
|
||||
}
|
||||
|
||||
// Computes floor(n / pow(10, N)) for small n and N.
|
||||
// Precondition: n <= pow(10, N + 1).
|
||||
template <int N> uint32_t small_division_by_pow10(uint32_t n) noexcept {
|
||||
static constexpr struct {
|
||||
uint32_t magic_number;
|
||||
int shift_amount;
|
||||
uint32_t divisor_times_10;
|
||||
} infos[] = {{0xcccd, 19, 100}, {0xa3d8, 22, 1000}};
|
||||
constexpr auto info = infos[N - 1];
|
||||
FMT_ASSERT(n <= info.divisor_times_10, "n is too large");
|
||||
return n * info.magic_number >> info.shift_amount;
|
||||
constexpr auto info = div_small_pow10_infos[N - 1];
|
||||
FMT_ASSERT(n <= info.divisor * 10, "n is too large");
|
||||
constexpr uint32_t magic_number =
|
||||
(1u << info.divisibility_check_bits) / info.divisor + 1;
|
||||
return (n * magic_number) >> info.shift_amount;
|
||||
}
|
||||
|
||||
// Computes floor(n / 10^(kappa + 1)) (float)
|
||||
inline uint32_t divide_by_10_to_kappa_plus_1(uint32_t n) noexcept {
|
||||
return n / float_info<float>::big_divisor;
|
||||
// 1374389535 = ceil(2^37/100)
|
||||
return (static_cast<uint64_t>(n) * 1374389535) >> 37;
|
||||
}
|
||||
// Computes floor(n / 10^(kappa + 1)) (double)
|
||||
inline uint64_t divide_by_10_to_kappa_plus_1(uint64_t n) noexcept {
|
||||
return umul128_upper64(n, 0x83126e978d4fdf3c) >> 9;
|
||||
// 2361183241434822607 = ceil(2^(64+7)/1000)
|
||||
return umul128_upper64(n, 2361183241434822607ull) >> 7;
|
||||
}
|
||||
|
||||
// Various subroutines using pow10 cache
|
||||
@ -1034,40 +1017,39 @@ template <> struct cache_accessor<float> {
|
||||
}
|
||||
|
||||
static uint32_t compute_delta(const cache_entry_type& cache,
|
||||
int beta_minus_1) noexcept {
|
||||
return static_cast<uint32_t>(cache >> (64 - 1 - beta_minus_1));
|
||||
int beta) noexcept {
|
||||
return static_cast<uint32_t>(cache >> (64 - 1 - beta));
|
||||
}
|
||||
|
||||
static compute_mul_parity_result compute_mul_parity(
|
||||
carrier_uint two_f, const cache_entry_type& cache,
|
||||
int beta_minus_1) noexcept {
|
||||
FMT_ASSERT(beta_minus_1 >= 1, "");
|
||||
FMT_ASSERT(beta_minus_1 < 64, "");
|
||||
carrier_uint two_f, const cache_entry_type& cache, int beta) noexcept {
|
||||
FMT_ASSERT(beta >= 1, "");
|
||||
FMT_ASSERT(beta < 64, "");
|
||||
|
||||
auto r = umul96_lower64(two_f, cache);
|
||||
return {((r >> (64 - beta_minus_1)) & 1) != 0,
|
||||
static_cast<uint32_t>(r >> (32 - beta_minus_1)) == 0};
|
||||
return {((r >> (64 - beta)) & 1) != 0,
|
||||
static_cast<uint32_t>(r >> (32 - beta)) == 0};
|
||||
}
|
||||
|
||||
static carrier_uint compute_left_endpoint_for_shorter_interval_case(
|
||||
const cache_entry_type& cache, int beta_minus_1) noexcept {
|
||||
const cache_entry_type& cache, int beta) noexcept {
|
||||
return static_cast<carrier_uint>(
|
||||
(cache - (cache >> (float_info<float>::significand_bits + 2))) >>
|
||||
(64 - float_info<float>::significand_bits - 1 - beta_minus_1));
|
||||
(64 - float_info<float>::significand_bits - 1 - beta));
|
||||
}
|
||||
|
||||
static carrier_uint compute_right_endpoint_for_shorter_interval_case(
|
||||
const cache_entry_type& cache, int beta_minus_1) noexcept {
|
||||
const cache_entry_type& cache, int beta) noexcept {
|
||||
return static_cast<carrier_uint>(
|
||||
(cache + (cache >> (float_info<float>::significand_bits + 1))) >>
|
||||
(64 - float_info<float>::significand_bits - 1 - beta_minus_1));
|
||||
(64 - float_info<float>::significand_bits - 1 - beta));
|
||||
}
|
||||
|
||||
static carrier_uint compute_round_up_for_shorter_interval_case(
|
||||
const cache_entry_type& cache, int beta_minus_1) noexcept {
|
||||
const cache_entry_type& cache, int beta) noexcept {
|
||||
return (static_cast<carrier_uint>(
|
||||
cache >>
|
||||
(64 - float_info<float>::significand_bits - 2 - beta_minus_1)) +
|
||||
(64 - float_info<float>::significand_bits - 2 - beta)) +
|
||||
1) /
|
||||
2;
|
||||
}
|
||||
@ -1794,40 +1776,38 @@ template <> struct cache_accessor<double> {
|
||||
}
|
||||
|
||||
static uint32_t compute_delta(cache_entry_type const& cache,
|
||||
int beta_minus_1) noexcept {
|
||||
return static_cast<uint32_t>(cache.high() >> (64 - 1 - beta_minus_1));
|
||||
int beta) noexcept {
|
||||
return static_cast<uint32_t>(cache.high() >> (64 - 1 - beta));
|
||||
}
|
||||
|
||||
static compute_mul_parity_result compute_mul_parity(
|
||||
carrier_uint two_f, const cache_entry_type& cache,
|
||||
int beta_minus_1) noexcept {
|
||||
FMT_ASSERT(beta_minus_1 >= 1, "");
|
||||
FMT_ASSERT(beta_minus_1 < 64, "");
|
||||
carrier_uint two_f, const cache_entry_type& cache, int beta) noexcept {
|
||||
FMT_ASSERT(beta >= 1, "");
|
||||
FMT_ASSERT(beta < 64, "");
|
||||
|
||||
auto r = umul192_lower128(two_f, cache);
|
||||
return {
|
||||
((r.high() >> (64 - beta_minus_1)) & 1) != 0,
|
||||
((r.high() << beta_minus_1) | (r.low() >> (64 - beta_minus_1))) == 0};
|
||||
return {((r.high() >> (64 - beta)) & 1) != 0,
|
||||
((r.high() << beta) | (r.low() >> (64 - beta))) == 0};
|
||||
}
|
||||
|
||||
static carrier_uint compute_left_endpoint_for_shorter_interval_case(
|
||||
const cache_entry_type& cache, int beta_minus_1) noexcept {
|
||||
const cache_entry_type& cache, int beta) noexcept {
|
||||
return (cache.high() -
|
||||
(cache.high() >> (float_info<double>::significand_bits + 2))) >>
|
||||
(64 - float_info<double>::significand_bits - 1 - beta_minus_1);
|
||||
(64 - float_info<double>::significand_bits - 1 - beta);
|
||||
}
|
||||
|
||||
static carrier_uint compute_right_endpoint_for_shorter_interval_case(
|
||||
const cache_entry_type& cache, int beta_minus_1) noexcept {
|
||||
const cache_entry_type& cache, int beta) noexcept {
|
||||
return (cache.high() +
|
||||
(cache.high() >> (float_info<double>::significand_bits + 1))) >>
|
||||
(64 - float_info<double>::significand_bits - 1 - beta_minus_1);
|
||||
(64 - float_info<double>::significand_bits - 1 - beta);
|
||||
}
|
||||
|
||||
static carrier_uint compute_round_up_for_shorter_interval_case(
|
||||
const cache_entry_type& cache, int beta_minus_1) noexcept {
|
||||
const cache_entry_type& cache, int beta) noexcept {
|
||||
return ((cache.high() >>
|
||||
(64 - float_info<double>::significand_bits - 2 - beta_minus_1)) +
|
||||
(64 - float_info<double>::significand_bits - 2 - beta)) +
|
||||
1) /
|
||||
2;
|
||||
}
|
||||
@ -1962,16 +1942,16 @@ FMT_INLINE decimal_fp<T> shorter_interval_case(int exponent) noexcept {
|
||||
decimal_fp<T> ret_value;
|
||||
// Compute k and beta
|
||||
const int minus_k = floor_log10_pow2_minus_log10_4_over_3(exponent);
|
||||
const int beta_minus_1 = exponent + floor_log2_pow10(-minus_k);
|
||||
const int beta = exponent + floor_log2_pow10(-minus_k);
|
||||
|
||||
// Compute xi and zi
|
||||
using cache_entry_type = typename cache_accessor<T>::cache_entry_type;
|
||||
const cache_entry_type cache = cache_accessor<T>::get_cached_power(-minus_k);
|
||||
|
||||
auto xi = cache_accessor<T>::compute_left_endpoint_for_shorter_interval_case(
|
||||
cache, beta_minus_1);
|
||||
cache, beta);
|
||||
auto zi = cache_accessor<T>::compute_right_endpoint_for_shorter_interval_case(
|
||||
cache, beta_minus_1);
|
||||
cache, beta);
|
||||
|
||||
// If the left endpoint is not an integer, increase it
|
||||
if (!is_left_endpoint_integer_shorter_interval<T>(exponent)) ++xi;
|
||||
@ -1988,8 +1968,8 @@ FMT_INLINE decimal_fp<T> shorter_interval_case(int exponent) noexcept {
|
||||
|
||||
// Otherwise, compute the round-up of y
|
||||
ret_value.significand =
|
||||
cache_accessor<T>::compute_round_up_for_shorter_interval_case(
|
||||
cache, beta_minus_1);
|
||||
cache_accessor<T>::compute_round_up_for_shorter_interval_case(cache,
|
||||
beta);
|
||||
ret_value.exponent = minus_k;
|
||||
|
||||
// When tie occurs, choose one of them according to the rule
|
||||
@ -2040,11 +2020,11 @@ template <typename T> decimal_fp<T> to_decimal(T x) noexcept {
|
||||
// Compute k and beta.
|
||||
const int minus_k = floor_log10_pow2(exponent) - float_info<T>::kappa;
|
||||
const cache_entry_type cache = cache_accessor<T>::get_cached_power(-minus_k);
|
||||
const int beta_minus_1 = exponent + floor_log2_pow10(-minus_k);
|
||||
const int beta = exponent + floor_log2_pow10(-minus_k);
|
||||
|
||||
// Compute zi and deltai.
|
||||
// 10^kappa <= deltai < 10^(kappa + 1)
|
||||
const uint32_t deltai = cache_accessor<T>::compute_delta(cache, beta_minus_1);
|
||||
const uint32_t deltai = cache_accessor<T>::compute_delta(cache, beta);
|
||||
const carrier_uint two_fc = significand << 1;
|
||||
|
||||
// For the case of binary32, the result of integer check is not correct for
|
||||
@ -2058,7 +2038,7 @@ template <typename T> decimal_fp<T> to_decimal(T x) noexcept {
|
||||
// Fortunately, with these inputs, that branch is never executed, so we are
|
||||
// fine.
|
||||
const typename cache_accessor<T>::compute_mul_result z_mul =
|
||||
cache_accessor<T>::compute_mul((two_fc | 1) << beta_minus_1, cache);
|
||||
cache_accessor<T>::compute_mul((two_fc | 1) << beta, cache);
|
||||
|
||||
// Step 2: Try larger divisor; remove trailing zeros if necessary.
|
||||
|
||||
@ -2090,13 +2070,12 @@ template <typename T> decimal_fp<T> to_decimal(T x) noexcept {
|
||||
// Otherwise, the inequalities on exponent ensure that
|
||||
// x is not an integer, so if z^(f) >= delta^(f) (even parity), we in fact
|
||||
// have strict inequality.
|
||||
if (!cache_accessor<T>::compute_mul_parity(two_fl, cache, beta_minus_1)
|
||||
.parity) {
|
||||
if (!cache_accessor<T>::compute_mul_parity(two_fl, cache, beta).parity) {
|
||||
goto small_divisor_case_label;
|
||||
}
|
||||
} else {
|
||||
const typename cache_accessor<T>::compute_mul_parity_result x_mul =
|
||||
cache_accessor<T>::compute_mul_parity(two_fl, cache, beta_minus_1);
|
||||
cache_accessor<T>::compute_mul_parity(two_fl, cache, beta);
|
||||
if (!x_mul.parity && !x_mul.is_integer) {
|
||||
goto small_divisor_case_label;
|
||||
}
|
||||
@ -2133,7 +2112,7 @@ small_divisor_case_label:
|
||||
// parity. Also, zi and r should have the same parity since the divisor
|
||||
// is an even number.
|
||||
const typename cache_accessor<T>::compute_mul_parity_result y_mul =
|
||||
cache_accessor<T>::compute_mul_parity(two_fc, cache, beta_minus_1);
|
||||
cache_accessor<T>::compute_mul_parity(two_fc, cache, beta);
|
||||
|
||||
if (y_mul.parity != approx_y_parity) {
|
||||
--ret_value.significand;
|
||||
|
Loading…
Reference in New Issue
Block a user