math: Add new exp10 implementation

New implementation is based on the existing exp/exp2, with different reduction constants and polynomial. Worst-case error in round-to- nearest is 0.513 ULP. The exp/exp2 shared table is reused for exp10 - .rodata size of e_exp_data increases by 64 bytes. As for exp/exp2, targets with single-instruction rounding/conversion intrinsics can use them by toggling TOINT_INTRINSICS=1 and adding the necessary code to their math_private.h. Improvements on Neoverse V1 compared to current GLIBC master: exp10 thruput: 3.3x in [-0x1.439b746e36b52p+8 0x1.34413509f79ffp+8] exp10 latency: 1.8x in [-0x1.439b746e36b52p+8 0x1.34413509f79ffp+8] Tested on: aarch64-linux-gnu (TOINT_INTRINSICS, fma contraction) and x86_64-linux-gnu (!TOINT_INTRINSICS, no fma contraction) Reviewed-by: Szabolcs Nagy <szabolcs.nagy@arm.com>
2024-11-24 05:50:14 +00:00 · 2023-12-01 09:49:45 +00:00 · 2023-12-01 09:49:45 +00:00 · 63d0a35d5f
commit 63d0a35d5f
parent 8e755f5bc8
3 changed files with 134 additions and 23 deletions
--- a/sysdeps/ieee754/dbl-64/e_exp10.c
+++ b/sysdeps/ieee754/dbl-64/e_exp10.c
@ -16,36 +16,132 @@
   <https://www.gnu.org/licenses/>.  */

 #include <math.h>
+#include <math-barriers.h>
+#include <math-narrow-eval.h>
 #include <math_private.h>
 #include <float.h>
 #include <libm-alias-finite.h>
+#include "math_config.h"

-static const double log10_high = 0x2.4d7637p0;
-static const double log10_low = 0x7.6aaa2b05ba95cp-28;
+#define N (1 << EXP_TABLE_BITS)
+#define IndexMask (N - 1)
+#define OFlowBound 0x1.34413509f79ffp8 /* log10(DBL_MAX).  */
+#define UFlowBound -0x1.5ep+8 /* -350.  */
+#define SmallTop 0x3c6 /* top12(0x1p-57).  */
+#define BigTop 0x407   /* top12(0x1p8).  */
+#define Thresh 0x41    /* BigTop - SmallTop.  */
+#define Shift __exp_data.shift
+#define C(i) __exp_data.exp10_poly[i]

-double
-__ieee754_exp10 (double arg)
+static double
+special_case (uint64_t sbits, double_t tmp, uint64_t ki)
 {
-  int32_t lx;
-  double arg_high, arg_low;
-  double exp_high, exp_low;
+  double_t scale, y;

-  if (!isfinite (arg))
-    return __ieee754_exp (arg);
-  if (arg < DBL_MIN_10_EXP - DBL_DIG - 10)
-    return DBL_MIN * DBL_MIN;
-  else if (arg > DBL_MAX_10_EXP + 1)
-    return DBL_MAX * DBL_MAX;
-  else if (fabs (arg) < 0x1p-56)
-    return 1.0;
+  if (ki - (1ull << 16) < 0x80000000)
+    {
+      /* The exponent of scale might have overflowed by 1.  */
+      sbits -= 1ull << 52;
+      scale = asdouble (sbits);
+      y = 2 * (scale + scale * tmp);
+      return check_oflow (y);
+    }

-  GET_LOW_WORD (lx, arg);
-  lx &= 0xf8000000;
-  arg_high = arg;
-  SET_LOW_WORD (arg_high, lx);
-  arg_low = arg - arg_high;
-  exp_high = arg_high * log10_high;
-  exp_low = arg_high * log10_low + arg_low * M_LN10;
-  return __ieee754_exp (exp_high) * __ieee754_exp (exp_low);
+  /* n < 0, need special care in the subnormal range.  */
+  sbits += 1022ull << 52;
+  scale = asdouble (sbits);
+  y = scale + scale * tmp;
+
+  if (y < 1.0)
+    {
+      /* Round y to the right precision before scaling it into the subnormal
+	 range to avoid double rounding that can cause 0.5+E/2 ulp error where
+	 E is the worst-case ulp error outside the subnormal range.  So this
+	 is only useful if the goal is better than 1 ulp worst-case error.  */
+      double_t lo = scale - y + scale * tmp;
+      double_t hi = 1.0 + y;
+      lo = 1.0 - hi + y + lo;
+      y = math_narrow_eval (hi + lo) - 1.0;
+      /* Avoid -0.0 with downward rounding.  */
+      if (WANT_ROUNDING && y == 0.0)
+	y = 0.0;
+      /* The underflow exception needs to be signaled explicitly.  */
+      math_force_eval (math_opt_barrier (0x1p-1022) * 0x1p-1022);
+    }
+  y = 0x1p-1022 * y;
+
+  return check_uflow (y);
 }
+
+/* Double-precision 10^x approximation. Largest observed error is ~0.513 ULP.  */
+double
+__ieee754_exp10 (double x)
+{
+  uint64_t ix = asuint64 (x);
+  uint32_t abstop = (ix >> 52) & 0x7ff;
+
+  if (__glibc_unlikely (abstop - SmallTop >= Thresh))
+    {
+      if (abstop - SmallTop >= 0x80000000)
+	/* Avoid spurious underflow for tiny x.
+	   Note: 0 is common input.  */
+	return x + 1;
+      if (abstop == 0x7ff)
+	return ix == asuint64 (-INFINITY) ? 0.0 : x + 1.0;
+      if (x >= OFlowBound)
+	return __math_oflow (0);
+      if (x < UFlowBound)
+	return __math_uflow (0);
+
+      /* Large x is special-cased below.  */
+      abstop = 0;
+    }
+
+  /* Reduce x: z = x * N / log10(2), k = round(z).  */
+  double_t z = __exp_data.invlog10_2N * x;
+  double_t kd;
+  int64_t ki;
+#if TOINT_INTRINSICS
+  kd = roundtoint (z);
+  ki = converttoint (z);
+#else
+  kd = math_narrow_eval (z + Shift);
+  kd -= Shift;
+  ki = kd;
+#endif
+
+  /* r = x - k * log10(2), r in [-0.5, 0.5].  */
+  double_t r = x;
+  r = __exp_data.neglog10_2hiN * kd + r;
+  r = __exp_data.neglog10_2loN * kd + r;
+
+  /* exp10(x) = 2^(k/N) * 2^(r/N).
+     Approximate the two components separately.  */
+
+  /* s = 2^(k/N), using lookup table.  */
+  uint64_t e = ki << (52 - EXP_TABLE_BITS);
+  uint64_t i = (ki & IndexMask) * 2;
+  uint64_t u = __exp_data.tab[i + 1];
+  uint64_t sbits = u + e;
+
+  double_t tail = asdouble (__exp_data.tab[i]);
+
+  /* 2^(r/N) ~= 1 + r * Poly(r).  */
+  double_t r2 = r * r;
+  double_t p = C (0) + r * C (1);
+  double_t y = C (2) + r * C (3);
+  y = y + r2 * C (4);
+  y = p + r2 * y;
+  y = tail + y * r;
+
+  if (__glibc_unlikely (abstop == 0))
+    return special_case (sbits, y, ki);
+
+  /* Assemble components:
+     y  = 2^(r/N) * 2^(k/N)
+       ~= (y + 1) * s.  */
+  double_t s = asdouble (sbits);
+  return s * y + s;
+}
+
 libm_alias_finite (__ieee754_exp10, __exp10)
--- a/sysdeps/ieee754/dbl-64/e_exp_data.c
+++ b/sysdeps/ieee754/dbl-64/e_exp_data.c
@ -58,6 +58,17 @@ const struct exp_data __exp_data = {
 0x1.5d7e09b4e3a84p-10,
 #endif
 },
+.invlog10_2N = 0x1.a934f0979a371p1 * N,
+.neglog10_2hiN = -0x1.3441350ap-2 / N,
+.neglog10_2loN = 0x1.0c0219dc1da99p-39 / N,
+.exp10_poly = {
+/* Coeffs generated using Remez in [-log10(2)/256, log10(2)/256 ].  */
+0x1.26bb1bbb55516p1,
+0x1.53524c73ce9fep1,
+0x1.0470591ce4b26p1,
+0x1.2bd76577fe684p0,
+0x1.1446eeccd0efbp-1
+},
 // 2^(k/N) ~= H[k]*(1 + T[k]) for int k in [0,N)
 // tab[2*k] = asuint64(T[k])
 // tab[2*k+1] = asuint64(H[k]) - (k << 52)/N
--- a/sysdeps/ieee754/dbl-64/math_config.h
+++ b/sysdeps/ieee754/dbl-64/math_config.h
@ -201,6 +201,10 @@ extern const struct exp_data
  double poly[4]; /* Last four coefficients.  */
  double exp2_shift;
  double exp2_poly[EXP2_POLY_ORDER];
+  double invlog10_2N;
+  double neglog10_2hiN;
+  double neglog10_2loN;
+  double exp10_poly[5];
  uint64_t tab[2*(1 << EXP_TABLE_BITS)];
 } __exp_data attribute_hidden;