glibc/sysdeps/ieee754/dbl-64/uexp.h
Wilco Dijkstra c3d466cba1 Remove slow paths from pow
Remove the slow paths from pow.  Like several other double precision math
functions, pow is exactly rounded.  This is not required from math functions
and causes major overheads as it requires multiple fallbacks using higher
precision arithmetic if a result is close to 0.5ULP.  Ridiculous slowdowns
of up to 100000x have been reported when the highest precision path triggers.

All GLIBC math tests pass on AArch64 and x64 (with ULP of pow set to 1).
The worst case error is ~0.506ULP.  A simple test over a few hundred million
values shows pow is 10% faster on average.  This fixes BZ #13932.

	[BZ #13932]
	* sysdeps/ieee754/dbl-64/uexp.h (err_1): Remove.
	* benchtests/pow-inputs: Update comment for slow path cases.
	* manual/probes.texi (slowpow_p10): Delete removed probe.
	(slowpow_p10): Likewise.
	* math/Makefile: Remove halfulp.c and slowpow.c.
	* sysdeps/aarch64/libm-test-ulps: Set ULP of pow to 1.
	* sysdeps/generic/math_private.h (__exp1): Remove error argument.
	(__halfulp): Remove.
	(__slowpow): Remove.
	* sysdeps/i386/fpu/halfulp.c: Delete file.
	* sysdeps/i386/fpu/slowpow.c: Likewise.
	* sysdeps/ia64/fpu/halfulp.c: Likewise.
	* sysdeps/ia64/fpu/slowpow.c: Likewise.
	* sysdeps/ieee754/dbl-64/e_exp.c (__exp1): Remove error argument,
	improve comments and add error analysis.
	* sysdeps/ieee754/dbl-64/e_pow.c (__ieee754_pow): Add error analysis.
	(power1): Remove function:
	(log1): Remove error argument, add error analysis.
	(my_log2): Remove function.
	* sysdeps/ieee754/dbl-64/halfulp.c: Delete file.
	* sysdeps/ieee754/dbl-64/slowpow.c: Likewise.
	* sysdeps/m68k/m680x0/fpu/halfulp.c: Likewise.
	* sysdeps/m68k/m680x0/fpu/slowpow.c: Likewise.
	* sysdeps/powerpc/power4/fpu/Makefile: Remove CPPFLAGS-slowpow.c.
	* sysdeps/x86_64/fpu/libm-test-ulps: Set ULP of pow to 1.
	* sysdeps/x86_64/fpu/multiarch/Makefile: Remove slowpow-fma.c,
	slowpow-fma4.c, halfulp-fma.c, halfulp-fma4.c.
	* sysdeps/x86_64/fpu/multiarch/e_pow-fma.c (__slowpow): Remove define.
	* sysdeps/x86_64/fpu/multiarch/e_pow-fma4.c (__slowpow): Likewise.
	* sysdeps/x86_64/fpu/multiarch/halfulp-fma.c: Delete file.
	* sysdeps/x86_64/fpu/multiarch/halfulp-fma4.c: Likewise.
	* sysdeps/x86_64/fpu/multiarch/slowpow-fma.c: Likewise.
	* sysdeps/x86_64/fpu/multiarch/slowpow-fma4.c: Likewise.
2018-02-12 10:47:09 +00:00

70 lines
3.0 KiB
C

/*
* IBM Accurate Mathematical Library
* Written by International Business Machines Corp.
* Copyright (C) 2001-2018 Free Software Foundation, Inc.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation; either version 2.1 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program; if not, see <http://www.gnu.org/licenses/>.
*/
/******************************************************************/
/* */
/* MODULE_NAME:uexp.h */
/* */
/* common data and variables prototype and definition */
/******************************************************************/
#ifndef UEXP_H
#define UEXP_H
#include "mydefs.h"
const static double zero = 0.0, hhuge = 1.0e300, tiny = 1.0e-300,
err_0 = 1.000014;
const static int4 bigint = 0x40862002,
badint = 0x40876000,smallint = 0x3C8fffff;
const static int4 hugeint = 0x7FFFFFFF, infint = 0x7ff00000;
#ifdef BIG_ENDI
const static mynumber inf = {{0x7FF00000, 0}}; /* inf */
const static mynumber t256 = {{0x4ff00000, 0}}; /* 2^256 */
const static mynumber ln_two1 = {{0x3FE62E42, 0xFEFA3800}};/*0.69314718055989033 */
const static mynumber ln_two2 = {{0x3D2EF357, 0x93C76730}};/*5.4979230187083712e-14*/
const static mynumber log2e = {{0x3FF71547, 0x652B82FE}};/* 1.4426950408889634 */
const static mynumber p2 = {{0x3FE00000, 0x000004DC}};/* 0.50000000000013811 */
const static mynumber p3 = {{0x3FC55555, 0x55555A0F}};/* 0.16666666666670024 */
const static mynumber three33 = {{0x42180000, 0}}; /* 25769803776 */
const static mynumber three51 = {{0x43380000, 0}}; /* 6755399441055744 */
#else
#ifdef LITTLE_ENDI
const static mynumber inf = {{0, 0x7FF00000}}; /* inf */
const static mynumber t256 = {{0, 0x4ff00000}}; /* 2^256 */
const static mynumber ln_two1 = {{0xFEFA3800, 0x3FE62E42}};/*0.69314718055989033 */
const static mynumber ln_two2 = {{0x93C76730, 0x3D2EF357}};/*5.4979230187083712e-14*/
const static mynumber log2e = {{0x652B82FE, 0x3FF71547}};/* 1.4426950408889634 */
const static mynumber p2 = {{0x000004DC, 0x3FE00000}};/* 0.50000000000013811 */
const static mynumber p3 = {{0x55555A0F, 0x3FC55555}};/* 0.16666666666670024 */
const static mynumber three33 = {{0, 0x42180000}}; /* 25769803776 */
const static mynumber three51 = {{0, 0x43380000}}; /* 6755399441055744 */
#endif
#endif
#endif