Add partial support for extended precision FP

This commit is contained in:
Victor Zverovich 2022-02-07 06:44:42 -08:00
parent 0a24a0714e
commit 2b6f7fc7a3
2 changed files with 38 additions and 23 deletions

View File

@ -245,8 +245,9 @@ struct fp {
template <typename Float> explicit FMT_CONSTEXPR fp(Float n) { assign(n); } template <typename Float> explicit FMT_CONSTEXPR fp(Float n) { assign(n); }
template <typename Float> template <typename Float>
using is_supported = bool_constant<sizeof(Float) == sizeof(uint64_t) || using is_supported = bool_constant<sizeof(Float) == sizeof(uint32_t) ||
sizeof(Float) == sizeof(uint32_t)>; sizeof(Float) == 2 * sizeof(uint32_t) ||
std::numeric_limits<Float>::digits == 64>;
// Assigns d to this and return true iff predecessor is closer than successor. // Assigns d to this and return true iff predecessor is closer than successor.
template <typename Float, FMT_ENABLE_IF(is_supported<Float>::value)> template <typename Float, FMT_ENABLE_IF(is_supported<Float>::value)>
@ -255,13 +256,13 @@ struct fp {
const int num_float_significand_bits = const int num_float_significand_bits =
detail::num_significand_bits<Float>(); detail::num_significand_bits<Float>();
const uint64_t implicit_bit = 1ULL << num_float_significand_bits; const uint64_t implicit_bit = 1ULL << num_float_significand_bits;
const uint64_t significand_mask = implicit_bit - 1; using carrier_uint = typename dragonbox::float_info<Float>::carrier_uint;
constexpr bool is_double = sizeof(Float) == sizeof(uint64_t); const carrier_uint significand_mask = implicit_bit - 1;
auto u = bit_cast<conditional_t<is_double, uint64_t, uint32_t>>(n); auto u = bit_cast<carrier_uint>(n);
f = u & significand_mask; f = u & significand_mask;
const uint64_t exponent_mask = (~0ULL >> 1) & ~significand_mask;
int biased_e = int biased_e =
static_cast<int>((u & exponent_mask) >> num_float_significand_bits); static_cast<int>((u & exponent_mask<Float>()) >>
dragonbox::float_info<Float>::significand_bits);
// The predecessor is closer if n is a normalized power of 2 (f == 0) other // The predecessor is closer if n is a normalized power of 2 (f == 0) other
// than the smallest normalized number (biased_e > 1). // than the smallest normalized number (biased_e > 1).
bool is_predecessor_closer = f == 0 && biased_e > 1; bool is_predecessor_closer = f == 0 && biased_e > 1;
@ -2120,10 +2121,9 @@ FMT_CONSTEXPR20 inline void format_dragon(fp value, bool is_predecessor_closer,
// is closer) to make lower and upper integers. This eliminates multiplication // is closer) to make lower and upper integers. This eliminates multiplication
// by 2 during later computations. // by 2 during later computations.
int shift = is_predecessor_closer ? 2 : 1; int shift = is_predecessor_closer ? 2 : 1;
uint64_t significand = value.f << shift;
if (value.e >= 0) { if (value.e >= 0) {
numerator.assign(significand); numerator.assign(value.f);
numerator <<= value.e; numerator <<= value.e + shift;
lower.assign(1); lower.assign(1);
lower <<= value.e; lower <<= value.e;
if (shift != 1) { if (shift != 1) {
@ -2141,11 +2141,13 @@ FMT_CONSTEXPR20 inline void format_dragon(fp value, bool is_predecessor_closer,
upper_store <<= 1; upper_store <<= 1;
upper = &upper_store; upper = &upper_store;
} }
numerator *= significand; numerator *= value.f;
numerator <<= shift;
denominator.assign(1); denominator.assign(1);
denominator <<= shift - value.e; denominator <<= shift - value.e;
} else { } else {
numerator.assign(significand); numerator.assign(value.f);
numerator <<= shift;
denominator.assign_pow10(exp10); denominator.assign_pow10(exp10);
denominator <<= shift - value.e; denominator <<= shift - value.e;
lower.assign(1); lower.assign(1);
@ -2261,7 +2263,7 @@ FMT_HEADER_ONLY_CONSTEXPR20 int format_float(Float value, int precision,
// https://www.cs.tufts.edu/~nr/cs257/archive/florian-loitsch/printf.pdf. // https://www.cs.tufts.edu/~nr/cs257/archive/florian-loitsch/printf.pdf.
const int min_exp = -60; // alpha in Grisu. const int min_exp = -60; // alpha in Grisu.
int cached_exp10 = 0; // K in Grisu. int cached_exp10 = 0; // K in Grisu.
fp normalized = normalize(fp(value)); fp normalized = normalize(fp(convert_float(value)));
const auto cached_pow = get_cached_power( const auto cached_pow = get_cached_power(
min_exp - (normalized.e + fp::num_significand_bits), cached_exp10); min_exp - (normalized.e + fp::num_significand_bits), cached_exp10);
normalized = normalized * cached_pow; normalized = normalized * cached_pow;
@ -2278,8 +2280,9 @@ FMT_HEADER_ONLY_CONSTEXPR20 int format_float(Float value, int precision,
} }
if (use_dragon) { if (use_dragon) {
auto f = fp(); auto f = fp();
bool is_predecessor_closer = bool is_predecessor_closer = specs.binary32
specs.binary32 ? f.assign(static_cast<float>(value)) : f.assign(value); ? f.assign(static_cast<float>(value))
: f.assign(convert_float(value));
// Limit precision to the maximum possible number of significant digits in // Limit precision to the maximum possible number of significant digits in
// an IEEE754 double because we don't need to generate zeros. // an IEEE754 double because we don't need to generate zeros.
const int max_double_digits = 767; const int max_double_digits = 767;

View File

@ -1198,7 +1198,7 @@ class utf8_to_utf16 {
namespace dragonbox { namespace dragonbox {
// Type-specific information that Dragonbox uses. // Type-specific information that Dragonbox uses.
template <class T> struct float_info; template <typename T, typename Enable = void> struct float_info;
template <> struct float_info<float> { template <> struct float_info<float> {
using carrier_uint = uint32_t; using carrier_uint = uint32_t;
@ -1246,6 +1246,15 @@ template <> struct float_info<double> {
static const int max_trailing_zeros = 16; static const int max_trailing_zeros = 16;
}; };
// 80-bit extended precision long double.
template <typename T>
struct float_info<T, enable_if_t<std::is_same<T, long double>::value &&
std::numeric_limits<T>::digits == 64>> {
using carrier_uint = detail::uint128_t;
static const int significand_bits = 64;
static const int exponent_bits = 15;
};
template <typename T> struct decimal_fp { template <typename T> struct decimal_fp {
using significand_type = typename float_info<T>::carrier_uint; using significand_type = typename float_info<T>::carrier_uint;
significand_type significand; significand_type significand;
@ -1295,11 +1304,14 @@ template <typename T>
auto snprintf_float(T value, int precision, float_specs specs, auto snprintf_float(T value, int precision, float_specs specs,
buffer<char>& buf) -> int; buffer<char>& buf) -> int;
template <typename T> constexpr auto promote_float(T value) -> T { template <typename T>
return value; using convert_float_result =
} conditional_t<std::is_same<T, float>::value || sizeof(T) == sizeof(double),
constexpr auto promote_float(float value) -> double { double, T>;
return static_cast<double>(value);
template <typename T>
constexpr auto convert_float(T value) -> convert_float_result<T> {
return static_cast<convert_float_result<T>>(value);
} }
template <typename OutputIt, typename Char> template <typename OutputIt, typename Char>
@ -2207,7 +2219,7 @@ FMT_CONSTEXPR20 auto write(OutputIt out, T value,
memory_buffer buffer; memory_buffer buffer;
if (fspecs.format == float_format::hex) { if (fspecs.format == float_format::hex) {
if (fspecs.sign) buffer.push_back(detail::sign<char>(fspecs.sign)); if (fspecs.sign) buffer.push_back(detail::sign<char>(fspecs.sign));
snprintf_float(promote_float(value), specs.precision, fspecs, buffer); snprintf_float(convert_float(value), specs.precision, fspecs, buffer);
return write_bytes<align::right>(out, {buffer.data(), buffer.size()}, return write_bytes<align::right>(out, {buffer.data(), buffer.size()},
specs); specs);
} }
@ -2222,7 +2234,7 @@ FMT_CONSTEXPR20 auto write(OutputIt out, T value,
} }
if (const_check(std::is_same<T, float>())) fspecs.binary32 = true; if (const_check(std::is_same<T, float>())) fspecs.binary32 = true;
if (!is_fast_float<T>()) fspecs.fallback = true; if (!is_fast_float<T>()) fspecs.fallback = true;
int exp = format_float(promote_float(value), precision, fspecs, buffer); int exp = format_float(convert_float(value), precision, fspecs, buffer);
fspecs.precision = precision; fspecs.precision = precision;
auto fp = big_decimal_fp{buffer.data(), static_cast<int>(buffer.size()), exp}; auto fp = big_decimal_fp{buffer.data(), static_cast<int>(buffer.size()), exp};
return write_float(out, fp, specs, fspecs, loc); return write_float(out, fp, specs, fspecs, loc);