mirror of
https://sourceware.org/git/glibc.git
synced 2024-11-23 21:40:12 +00:00
3e08ff544b
Similar algorithm is used as in log: log2(2^k x) = k + log2(c) + log2(x/c) where the last term is approximated by a polynomial of x/c - 1, the first order coefficient is about 1/ln2 in this case. There is separate code path when fma instruction is not available for computing x/c - 1 precisely, for which the table size is doubled. The worst case error is 0.547 ULP (0.55 without fma), the read only global data size is 1168 bytes (2192 without fma) on aarch64. The non-nearest rounding error is less than 1 ULP. Improvements on Cortex-A72 compared to current glibc master: log2 thruput: 2.00x in [0.01 11.1] log2 latency: 2.04x in [0.01 11.1] log2 thruput: 2.17x in [0.999 1.001] log2 latency: 2.88x in [0.999 1.001] Tested on aarch64-linux-gnu (defined __FP_FAST_FMA) arm-linux-gnueabihf (!defined __FP_FAST_FMA) x86_64-linux-gnu (!defined __FP_FAST_FMA) powerpc64le-linxu-gnu (defined __FP_FAST_FMA) targets. * NEWS: Mention log2 improvements. * math/Makefile (type-double-routines): Add e_log2_data. * sysdeps/i386/fpu/e_log2_data.c: New file. * sysdeps/ia64/fpu/e_log2_data.c: New file. * sysdeps/ieee754/dbl-64/e_log2.c: Rewrite. * sysdeps/ieee754/dbl-64/e_log2_data.c: New file. * sysdeps/ieee754/dbl-64/math_config.h (__log2_data): Add. * sysdeps/ieee754/dbl-64/wordsize-64/e_log2.c: Remove. * sysdeps/m68k/m680x0/fpu/e_log2_data.c: New file.
169 lines
4.8 KiB
C
169 lines
4.8 KiB
C
/* Configuration for double precision math routines.
|
|
Copyright (C) 2018 Free Software Foundation, Inc.
|
|
This file is part of the GNU C Library.
|
|
|
|
The GNU C Library is free software; you can redistribute it and/or
|
|
modify it under the terms of the GNU Lesser General Public
|
|
License as published by the Free Software Foundation; either
|
|
version 2.1 of the License, or (at your option) any later version.
|
|
|
|
The GNU C Library is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
Lesser General Public License for more details.
|
|
|
|
You should have received a copy of the GNU Lesser General Public
|
|
License along with the GNU C Library; if not, see
|
|
<http://www.gnu.org/licenses/>. */
|
|
|
|
#ifndef _MATH_CONFIG_H
|
|
#define _MATH_CONFIG_H
|
|
|
|
#include <math.h>
|
|
#include <math_private.h>
|
|
#include <stdint.h>
|
|
|
|
#ifndef WANT_ROUNDING
|
|
/* Correct special case results in non-nearest rounding modes. */
|
|
# define WANT_ROUNDING 1
|
|
#endif
|
|
#ifndef WANT_ERRNO
|
|
/* Set errno according to ISO C with (math_errhandling & MATH_ERRNO) != 0. */
|
|
# define WANT_ERRNO 1
|
|
#endif
|
|
#ifndef WANT_ERRNO_UFLOW
|
|
/* Set errno to ERANGE if result underflows to 0 (in all rounding modes). */
|
|
# define WANT_ERRNO_UFLOW (WANT_ROUNDING && WANT_ERRNO)
|
|
#endif
|
|
|
|
#ifndef TOINT_INTRINSICS
|
|
/* When set, the roundtoint and converttoint functions are provided with
|
|
the semantics documented below. */
|
|
# define TOINT_INTRINSICS 0
|
|
#endif
|
|
|
|
#if TOINT_INTRINSICS
|
|
/* Round x to nearest int in all rounding modes, ties have to be rounded
|
|
consistently with converttoint so the results match. If the result
|
|
would be outside of [-2^31, 2^31-1] then the semantics is unspecified. */
|
|
static inline double_t
|
|
roundtoint (double_t x);
|
|
|
|
/* Convert x to nearest int in all rounding modes, ties have to be rounded
|
|
consistently with roundtoint. If the result is not representible in an
|
|
int32_t then the semantics is unspecified. */
|
|
static inline int32_t
|
|
converttoint (double_t x);
|
|
#endif
|
|
|
|
static inline uint64_t
|
|
asuint64 (double f)
|
|
{
|
|
union
|
|
{
|
|
double f;
|
|
uint64_t i;
|
|
} u = {f};
|
|
return u.i;
|
|
}
|
|
|
|
static inline double
|
|
asdouble (uint64_t i)
|
|
{
|
|
union
|
|
{
|
|
uint64_t i;
|
|
double f;
|
|
} u = {i};
|
|
return u.f;
|
|
}
|
|
|
|
#define NOINLINE __attribute__ ((noinline))
|
|
|
|
/* Error handling tail calls for special cases, with a sign argument.
|
|
The sign of the return value is set if the argument is non-zero. */
|
|
|
|
/* The result overflows. */
|
|
attribute_hidden double __math_oflow (uint32_t);
|
|
/* The result underflows to 0 in nearest rounding mode. */
|
|
attribute_hidden double __math_uflow (uint32_t);
|
|
/* The result underflows to 0 in some directed rounding mode only. */
|
|
attribute_hidden double __math_may_uflow (uint32_t);
|
|
/* Division by zero. */
|
|
attribute_hidden double __math_divzero (uint32_t);
|
|
|
|
/* Error handling using input checking. */
|
|
|
|
/* Invalid input unless it is a quiet NaN. */
|
|
attribute_hidden double __math_invalid (double);
|
|
|
|
/* Error handling using output checking, only for errno setting. */
|
|
|
|
/* Check if the result overflowed to infinity. */
|
|
attribute_hidden double __math_check_oflow (double);
|
|
/* Check if the result underflowed to 0. */
|
|
attribute_hidden double __math_check_uflow (double);
|
|
|
|
/* Check if the result overflowed to infinity. */
|
|
static inline double
|
|
check_oflow (double x)
|
|
{
|
|
return WANT_ERRNO ? __math_check_oflow (x) : x;
|
|
}
|
|
|
|
/* Check if the result underflowed to 0. */
|
|
static inline double
|
|
check_uflow (double x)
|
|
{
|
|
return WANT_ERRNO ? __math_check_uflow (x) : x;
|
|
}
|
|
|
|
#define EXP_TABLE_BITS 7
|
|
#define EXP_POLY_ORDER 5
|
|
#define EXP2_POLY_ORDER 5
|
|
extern const struct exp_data
|
|
{
|
|
double invln2N;
|
|
double shift;
|
|
double negln2hiN;
|
|
double negln2loN;
|
|
double poly[4]; /* Last four coefficients. */
|
|
double exp2_shift;
|
|
double exp2_poly[EXP2_POLY_ORDER];
|
|
uint64_t tab[2*(1 << EXP_TABLE_BITS)];
|
|
} __exp_data attribute_hidden;
|
|
|
|
#define LOG_TABLE_BITS 7
|
|
#define LOG_POLY_ORDER 6
|
|
#define LOG_POLY1_ORDER 12
|
|
extern const struct log_data
|
|
{
|
|
double ln2hi;
|
|
double ln2lo;
|
|
double poly[LOG_POLY_ORDER - 1]; /* First coefficient is 1. */
|
|
double poly1[LOG_POLY1_ORDER - 1];
|
|
/* See e_log_data.c for details. */
|
|
struct {double invc, logc;} tab[1 << LOG_TABLE_BITS];
|
|
#ifndef __FP_FAST_FMA
|
|
struct {double chi, clo;} tab2[1 << LOG_TABLE_BITS];
|
|
#endif
|
|
} __log_data attribute_hidden;
|
|
|
|
#define LOG2_TABLE_BITS 6
|
|
#define LOG2_POLY_ORDER 7
|
|
#define LOG2_POLY1_ORDER 11
|
|
extern const struct log2_data
|
|
{
|
|
double invln2hi;
|
|
double invln2lo;
|
|
double poly[LOG2_POLY_ORDER - 1];
|
|
double poly1[LOG2_POLY1_ORDER - 1];
|
|
/* See e_log2_data.c for details. */
|
|
struct {double invc, logc;} tab[1 << LOG2_TABLE_BITS];
|
|
#ifndef __FP_FAST_FMA
|
|
struct {double chi, clo;} tab2[1 << LOG2_TABLE_BITS];
|
|
#endif
|
|
} __log2_data attribute_hidden;
|
|
|
|
#endif
|