Optimize grisu using uint128_t

2019-04-13 13:04:27 -07:00 · 2019-04-13 13:04:27 -07:00 · bade46aae5
commit bade46aae5
parent 41fbaeb3b1
2 changed files with 15 additions and 3 deletions
--- a/include/fmt/format-inl.h
+++ b/include/fmt/format-inl.h
@ -440,6 +440,13 @@ inline fp operator-(fp x, fp y) {
 // with half-up tie breaking, r.e = x.e + y.e + 64. Result may not be
 // normalized.
 FMT_FUNC fp operator*(fp x, fp y) {
+  int exp = x.e + y.e + 64;
+#if FMT_USE_INT128
+  auto product = static_cast<__uint128_t>(x.f) * y.f;
+  auto f = static_cast<uint64_t>(product >> 64);
+  if ((static_cast<uint64_t>(product) & (1ULL << 63)) != 0) ++f;
+  return fp(f, exp);
+#else
  // Multiply 32-bit parts of significands.
  uint64_t mask = (1ULL << 32) - 1;
  uint64_t a = x.f >> 32, b = x.f & mask;
@ -447,7 +454,8 @@ FMT_FUNC fp operator*(fp x, fp y) {
  uint64_t ac = a * c, bc = b * c, ad = a * d, bd = b * d;
  // Compute mid 64-bit of result and round.
  uint64_t mid = (bd >> 32) + (ad & mask) + (bc & mask) + (1U << 31);
-  return fp(ac + (ad >> 32) + (bc >> 32) + (mid >> 32), x.e + y.e + 64);
+  return fp(ac + (ad >> 32) + (bc >> 32) + (mid >> 32), exp);
+#endif
 }

 // Returns cached power (of 10) c_k = c_k.f * pow(2, c_k.e) such that its
--- a/include/fmt/format.h
+++ b/include/fmt/format.h
@ -176,8 +176,12 @@ FMT_END_NAMESPACE
 #  define FMT_USE_TRAILING_RETURN 0
 #endif

-#ifndef FMT_USE_INT128
-# define FMT_USE_INT128 (__SIZEOF_INT128__ != 0)
+#ifdef FMT_USE_INT128
+// Do nothing.
+#elif defined(__SIZEOF_INT128__)
+#  define FMT_USE_INT128 1
+#else
+#  define FMT_USE_INT128 0
 #endif

 // __builtin_clz is broken in clang with Microsoft CodeGen: