Fix pow overflow in non-default rounding modes (bug 16315).

This patch fixes bug 16315, bad pow handling of overflow/underflow in non-default rounding modes. Tests of pow are duly converted to ALL_RM_TEST to run all tests in all rounding modes. There are two main issues here. First, various implementations compute a negative result by negating a positive result, but this yields inappropriate overflow / underflow values for directed rounding, so either overflow / underflow results need recomputing in the correct sign, or the relevant overflowing / underflowing operation needs to be made to have a result of the correct sign. Second, the dbl-64 implementation sets FE_TONEAREST internally; in the overflow / underflow case, the result needs recomputing in the original rounding mode. Tested x86_64 and x86 and ulps updated accordingly. [BZ #16315] * sysdeps/i386/fpu/e_pow.S (__ieee754_pow): Ensure possibly overflowing or underflowing operations take place with sign of result. * sysdeps/i386/fpu/e_powf.S (__ieee754_powf): Likewise. * sysdeps/i386/fpu/e_powl.S (__ieee754_powl): Likewise. * sysdeps/ieee754/dbl-64/e_pow.c: Include <math.h>. (__ieee754_pow): Recompute overflowing and underflowing results in original rounding mode. * sysdeps/x86/fpu/powl_helper.c: Include <stdbool.h>. (__powl_helper): Allow negative argument X and scale negated value as needed. Avoid passing value outside [-1, 1] to f2xm1. * sysdeps/x86_64/fpu/e_powl.S (__ieee754_powl): Ensure possibly overflowing or underflowing operations take place with sign of result. * sysdeps/x86_64/fpu/multiarch/e_pow.c [HAVE_FMA4_SUPPORT]: Include <math.h>. * math/auto-libm-test-in: Add more tests of pow. * math/auto-libm-test-out: Regenerated. * math/libm-test.inc (pow_test): Use ALL_RM_TEST. (pow_tonearest_test_data): Remove. (pow_test_tonearest): Likewise. (pow_towardzero_test_data): Likewise. (pow_test_towardzero): Likewise. (pow_downward_test_data): Likewise. (pow_test_downward): Likewise. (pow_upward_test_data): Likewise. (pow_test_upward): Likewise. (main): Don't call removed functions. * sysdeps/i386/fpu/libm-test-ulps: Update. * sysdeps/x86_64/fpu/libm-test-ulps: Likewise.
2024-11-21 20:40:05 +00:00 · 2014-06-23 20:12:33 +00:00 · 2014-06-23 20:12:33 +00:00 · 4da6db5188
commit 4da6db5188
parent 5686b236cc
14 changed files with 2955 additions and 235 deletions
--- a/34
+++ b/34
@ -1,3 +1,37 @@
+2014-06-23  Joseph Myers  <joseph@codesourcery.com>
+
+	[BZ #16315]
+	* sysdeps/i386/fpu/e_pow.S (__ieee754_pow): Ensure possibly
+	overflowing or underflowing operations take place with sign of
+	result.
+	* sysdeps/i386/fpu/e_powf.S (__ieee754_powf): Likewise.
+	* sysdeps/i386/fpu/e_powl.S (__ieee754_powl): Likewise.
+	* sysdeps/ieee754/dbl-64/e_pow.c: Include <math.h>.
+	(__ieee754_pow): Recompute overflowing and underflowing results in
+	original rounding mode.
+	* sysdeps/x86/fpu/powl_helper.c: Include <stdbool.h>.
+	(__powl_helper): Allow negative argument X and scale negated value
+	as needed.  Avoid passing value outside [-1, 1] to f2xm1.
+	* sysdeps/x86_64/fpu/e_powl.S (__ieee754_powl): Ensure possibly
+	overflowing or underflowing operations take place with sign of
+	result.
+	* sysdeps/x86_64/fpu/multiarch/e_pow.c [HAVE_FMA4_SUPPORT]:
+	Include <math.h>.
+	* math/auto-libm-test-in: Add more tests of pow.
+	* math/auto-libm-test-out: Regenerated.
+	* math/libm-test.inc (pow_test): Use ALL_RM_TEST.
+	(pow_tonearest_test_data): Remove.
+	(pow_test_tonearest): Likewise.
+	(pow_towardzero_test_data): Likewise.
+	(pow_test_towardzero): Likewise.
+	(pow_downward_test_data): Likewise.
+	(pow_test_downward): Likewise.
+	(pow_upward_test_data): Likewise.
+	(pow_test_upward): Likewise.
+	(main): Don't call removed functions.
+	* sysdeps/i386/fpu/libm-test-ulps: Update.
+	* sysdeps/x86_64/fpu/libm-test-ulps: Likewise.
+
 2014-06-23  Roland McGrath  <roland@hack.frob.com>

 	* sysdeps/unix/sysv/linux/powerpc/powerpc32/nofpu/nptl/c++-types.data:
--- a/22
+++ b/22
@ -10,17 +10,17 @@ Version 2.20
 * The following bugs are resolved with this release:

  6804, 9894, 12994, 13347, 13651, 14308, 14770, 15119, 15132, 15347, 15514,
-  15698, 15804, 15894, 15946, 16002, 16064, 16095, 16198, 16284, 16348,
-  16349, 16357, 16362, 16447, 16516, 16532, 16545, 16564, 16574, 16599,
-  16600, 16609, 16610, 16611, 16613, 16619, 16623, 16629, 16632, 16634,
-  16639, 16642, 16648, 16649, 16670, 16674, 16677, 16680, 16681, 16683,
-  16689, 16695, 16701, 16706, 16707, 16712, 16713, 16714, 16724, 16731,
-  16739, 16740, 16743, 16754, 16758, 16759, 16760, 16770, 16786, 16789,
-  16791, 16796, 16799, 16800, 16815, 16823, 16824, 16831, 16838, 16849,
-  16854, 16876, 16877, 16878, 16882, 16885, 16888, 16890, 16912, 16915,
-  16916, 16917, 16922, 16927, 16928, 16932, 16943, 16958, 16965, 16966,
-  16967, 16977, 16978, 16984, 16990, 16996, 17009, 17022, 17031, 17042,
-  17048, 17058, 17062, 17069, 17075, 17079.
+  15698, 15804, 15894, 15946, 16002, 16064, 16095, 16198, 16284, 16315,
+  16348, 16349, 16357, 16362, 16447, 16516, 16532, 16545, 16564, 16574,
+  16599, 16600, 16609, 16610, 16611, 16613, 16619, 16623, 16629, 16632,
+  16634, 16639, 16642, 16648, 16649, 16670, 16674, 16677, 16680, 16681,
+  16683, 16689, 16695, 16701, 16706, 16707, 16712, 16713, 16714, 16724,
+  16731, 16739, 16740, 16743, 16754, 16758, 16759, 16760, 16770, 16786,
+  16789, 16791, 16796, 16799, 16800, 16815, 16823, 16824, 16831, 16838,
+  16849, 16854, 16876, 16877, 16878, 16882, 16885, 16888, 16890, 16912,
+  16915, 16916, 16917, 16922, 16927, 16928, 16932, 16943, 16958, 16965,
+  16966, 16967, 16977, 16978, 16984, 16990, 16996, 17009, 17022, 17031,
+  17042, 17048, 17058, 17062, 17069, 17075, 17079.

 * Optimized strchr implementation for AArch64.  Contributed by ARM Ltd.

--- a/math/auto-libm-test-in
+++ b/math/auto-libm-test-in
@ -1613,6 +1613,71 @@ pow -max 0x1.ffffffffffffffffffffffffffffp+112
 pow -max 0x1.ffffffffffffffffffffffffffffp+113
 pow -max max

+pow -0x1p65 2
+pow -0x1p65 3
+pow -0x1p65 4
+pow -0x1p65 5
+pow -0x1p43 3
+pow -0x1p43 4
+pow -0x1p43 5
+pow -0x1p33 4
+pow -0x1p33 5
+pow -0x1p26 5
+pow -0x1p-65 -2
+pow -0x1p-65 -3
+pow -0x1p-65 -4
+pow -0x1p-65 -5
+pow -0x1p-43 -3
+pow -0x1p-43 -4
+pow -0x1p-43 -5
+pow -0x1p-33 -4
+pow -0x1p-33 -5
+pow -0x1p-26 -5
+
+pow -0x1p513 2
+pow -0x1p513 3
+pow -0x1p513 4
+pow -0x1p513 5
+pow -0x1p342 3
+pow -0x1p342 4
+pow -0x1p342 5
+pow -0x1p257 4
+pow -0x1p257 5
+pow -0x1p205 5
+pow -0x1p-513 -2
+pow -0x1p-513 -3
+pow -0x1p-513 -4
+pow -0x1p-513 -5
+pow -0x1p-342 -3
+pow -0x1p-342 -4
+pow -0x1p-342 -5
+pow -0x1p-257 -4
+pow -0x1p-257 -5
+pow -0x1p-205 -5
+
+pow -0x1p8192 2
+pow -0x1p8192 3
+pow -0x1p8192 4
+pow -0x1p8192 5
+pow -0x1p5462 3
+pow -0x1p5462 4
+pow -0x1p5462 5
+pow -0x1p4097 4
+pow -0x1p4097 5
+pow -0x1p3277 5
+pow -0x1p64 257
+pow -0x1p-8192 -2
+pow -0x1p-8192 -3
+pow -0x1p-8192 -4
+pow -0x1p-8192 -5
+pow -0x1p-5462 -3
+pow -0x1p-5462 -4
+pow -0x1p-5462 -5
+pow -0x1p-4097 -4
+pow -0x1p-4097 -5
+pow -0x1p-3277 -5
+pow -0x1p-64 -257
+
 pow -0.5 126
 pow -0.5 127
 pow -0.5 -126
--- a/math/auto-libm-test-out
+++ b/math/auto-libm-test-out
--- a/math/libm-test.inc
+++ b/math/libm-test.inc
@ -8697,69 +8697,7 @@ static const struct test_ff_f_data pow_test_data[] =
 static void
 pow_test (void)
 {
-
-  START (pow, 0);
-  RUN_TEST_LOOP_ff_f (pow, pow_test_data, );
-  END;
-}
-
-
-static const struct test_ff_f_data pow_tonearest_test_data[] =
-  {
-    AUTO_TESTS_ff_f (pow),
-  };
-
-static void
-pow_test_tonearest (void)
-{
-  START (pow_tonearest, 0);
-  RUN_TEST_LOOP_ff_f (pow, pow_tonearest_test_data, FE_TONEAREST);
-  END;
-}
-
-
-static const struct test_ff_f_data pow_towardzero_test_data[] =
-  {
-    TEST_ff_f (pow, 1.0625L, 1.125L, 1.070582293028761362162622578677070098674L),
-    TEST_ff_f (pow, 1.5L, 1.03125L, 1.519127098714743184071644334163037684948L),
-  };
-
-static void
-pow_test_towardzero (void)
-{
-  START (pow_towardzero, 0);
-  RUN_TEST_LOOP_ff_f (pow, pow_towardzero_test_data, FE_TOWARDZERO);
-  END;
-}
-
-
-static const struct test_ff_f_data pow_downward_test_data[] =
-  {
-    TEST_ff_f (pow, 1.0625L, 1.125L, 1.070582293028761362162622578677070098674L),
-    TEST_ff_f (pow, 1.5L, 1.03125L, 1.519127098714743184071644334163037684948L),
-  };
-
-static void
-pow_test_downward (void)
-{
-  START (pow_downward, 0);
-  RUN_TEST_LOOP_ff_f (pow, pow_downward_test_data, FE_DOWNWARD);
-  END;
-}
-
-
-static const struct test_ff_f_data pow_upward_test_data[] =
-  {
-    TEST_ff_f (pow, 1.0625L, 1.125L, 1.070582293028761362162622578677070098674L),
-    TEST_ff_f (pow, 1.5L, 1.03125L, 1.519127098714743184071644334163037684948L),
-  };
-
-static void
-pow_test_upward (void)
-{
-  START (pow_upward, 0);
-  RUN_TEST_LOOP_ff_f (pow, pow_upward_test_data, FE_UPWARD);
-  END;
+  ALL_RM_TEST (pow, 0, pow_test_data, RUN_TEST_LOOP_ff_f, END);
 }


@ -9946,10 +9884,6 @@ main (int argc, char **argv)
  fabs_test ();
  hypot_test ();
  pow_test ();
-  pow_test_tonearest ();
-  pow_test_towardzero ();
-  pow_test_downward ();
-  pow_test_upward ();
  sqrt_test ();

  /* Error and gamma functions:  */
--- a/sysdeps/i386/fpu/e_pow.S
+++ b/sysdeps/i386/fpu/e_pow.S
@ -144,12 +144,22 @@ ENTRY(__ieee754_pow)
 4:	fldl	MO(one)		// 1 : x
 	fxch

+	/* If y is even, take the absolute value of x.  Otherwise,
+	   ensure all intermediate values that might overflow have the
+	   sign of x.  */
+	testb	$1, %al
+	jnz	6f
+	fabs
+
 6:	shrdl	$1, %edx, %eax
 	jnc	5f
 	fxch
+	fabs
 	fmul	%st(1)		// x : ST*x
 	fxch
-5:	fmul	%st(0), %st	// x*x : ST*x
+5:	fld	%st		// x : x : ST*x
+	fabs			// |x| : x : ST*x
+	fmulp			// |x|*x : ST*x
 	shrl	$1, %edx
 	movl	%eax, %ecx
 	orl	%edx, %ecx
@ -207,27 +217,28 @@ ENTRY(__ieee754_pow)
 	fxch			// fract(y*log2(x)) : int(y*log2(x))
 	f2xm1			// 2^fract(y*log2(x))-1 : int(y*log2(x))
 	faddl	MO(one)		// 2^fract(y*log2(x)) : int(y*log2(x))
-	fscale			// 2^fract(y*log2(x))*2^int(y*log2(x)) : int(y*log2(x))
-	fstp	%st(1)		// 2^fract(y*log2(x))*2^int(y*log2(x))
+
+	// Before scaling, we must negate if x is negative and y is an
+	// odd integer.
 	testb	$2, %dh
-	jz	292f
+	jz	291f
 	// x is negative.  If y is an odd integer, negate the result.
-	fldl	20(%esp)	// y : abs(result)
-	fld	%st		// y : y : abs(result)
-	fabs			// |y| : y : abs(result)
-	fcompl	MO(p63)		// y : abs(result)
+	fldl	20(%esp)	// y : 2^fract(y*log2(x)) : int(y*log2(x))
+	fld	%st		// y : y : 2^fract(y*log2(x)) : int(y*log2(x))
+	fabs			// |y| : y : 2^fract(y*log2(x)) : int(y*log2(x))
+	fcompl	MO(p63)		// y : 2^fract(y*log2(x)) : int(y*log2(x))
 	fnstsw
 	sahf
-	jnc	291f
+	jnc	290f

 	// We must find out whether y is an odd integer.
-	fld	%st		// y : y : abs(result)
-	fistpll	(%esp)		// y : abs(result)
-	fildll	(%esp)		// int(y) : y : abs(result)
-	fucompp			// abs(result)
+	fld	%st		// y : y : 2^fract(y*log2(x)) : int(y*log2(x))
+	fistpll	(%esp)		// y : 2^fract(y*log2(x)) : int(y*log2(x))
+	fildll	(%esp)		// int(y) : y : 2^fract(y*log2(x)) : int(y*log2(x))
+	fucompp			// 2^fract(y*log2(x)) : int(y*log2(x))
 	fnstsw
 	sahf
-	jne	292f
+	jne	291f

 	// OK, the value is an integer, but is it odd?
 	popl	%eax
@ -235,14 +246,17 @@ ENTRY(__ieee754_pow)
 	popl	%edx
 	cfi_adjust_cfa_offset (-4)
 	andb	$1, %al
-	jz	290f		// jump if not odd
+	jz	292f		// jump if not odd
 	// It's an odd integer.
 	fchs
-290:	ret
+	jmp	292f
+
 	cfi_adjust_cfa_offset (8)
-291:	fstp	%st(0)		// abs(result)
-292:	addl	$8, %esp
+290:	fstp	%st(0)		// 2^fract(y*log2(x)) : int(y*log2(x))
+291:	addl	$8, %esp
 	cfi_adjust_cfa_offset (-8)
+292:	fscale			// +/- 2^fract(y*log2(x))*2^int(y*log2(x)) : int(y*log2(x))
+	fstp	%st(1)		// +/- 2^fract(y*log2(x))*2^int(y*log2(x))
 	ret


--- a/sysdeps/i386/fpu/e_powf.S
+++ b/sysdeps/i386/fpu/e_powf.S
@ -129,12 +129,22 @@ ENTRY(__ieee754_powf)
 4:	fldl	MO(one)		// 1 : x
 	fxch

+	/* If y is even, take the absolute value of x.  Otherwise,
+	   ensure all intermediate values that might overflow have the
+	   sign of x.  */
+	testb	$1, %dl
+	jnz	6f
+	fabs
+
 6:	shrl	$1, %edx
 	jnc	5f
 	fxch
+	fabs
 	fmul	%st(1)		// x : ST*x
 	fxch
-5:	fmul	%st(0), %st	// x*x : ST*x
+5:	fld	%st		// x : x : ST*x
+	fabs			// |x| : x : ST*x
+	fmulp			// |x|*x : ST*x
 	testl	%edx, %edx
 	jnz	6b
 	fstp	%st(0)		// ST*x
--- a/sysdeps/i386/fpu/e_powl.S
+++ b/sysdeps/i386/fpu/e_powl.S
@ -151,7 +151,7 @@ ENTRY(__ieee754_powl)
 	fcompl	MO(p3)		// y : x
 	fnstsw
 	sahf
-	jnc	2f
+	jnc	3f
 	popl	%eax
 	cfi_adjust_cfa_offset (-4)
 	popl	%edx
@ -166,12 +166,22 @@ ENTRY(__ieee754_powl)
 4:	fldl	MO(one)		// 1 : x
 	fxch

+	/* If y is even, take the absolute value of x.  Otherwise,
+	   ensure all intermediate values that might overflow have the
+	   sign of x.  */
+	testb	$1, %al
+	jnz	6f
+	fabs
+
 6:	shrdl	$1, %edx, %eax
 	jnc	5f
 	fxch
+	fabs
 	fmul	%st(1)		// x : ST*x
 	fxch
-5:	fmul	%st(0), %st	// x*x : ST*x
+5:	fld	%st		// x : x : ST*x
+	fabs			// |x| : x : ST*x
+	fmulp			// |x|*x : ST*x
 	shrl	$1, %edx
 	movl	%eax, %ecx
 	orl	%edx, %ecx
@ -198,79 +208,31 @@ ENTRY(__ieee754_powl)

 	cfi_adjust_cfa_offset (8)
 	.align ALIGNARG(4)
-2:	// y is a large integer (absolute value at least 8), but
-	// may be odd unless at least 1L<<64.  So it may be necessary
-	// to adjust the sign of a negative result afterwards.
-	fxch			// x : y
-	fabs			// |x| : y
-	fxch			// y : |x|
+2:	// y is a large integer (absolute value at least 1L<<63).
 	// If y has absolute value at least 1L<<78, then any finite
 	// nonzero x will result in 0 (underflow), 1 or infinity (overflow).
 	// Saturate y to those bounds to avoid overflow in the calculation
 	// of y*log2(x).
-	fld	%st		// y : y : |x|
-	fabs			// |y| : y : |x|
-	fcompl	MO(p78)		// y : |x|
+	fld	%st		// y : y : x
+	fabs			// |y| : y : x
+	fcompl	MO(p78)		// y : x
 	fnstsw
 	sahf
 	jc	3f
 	fstp	%st(0)		// pop y
-	fldl	MO(p78)		// 1L<<78 : |x|
+	fldl	MO(p78)		// 1L<<78 : x
 	testb	$2, %dl
 	jz	3f		// y > 0
-	fchs			// -(1L<<78) : |x|
+	fchs			// -(1L<<78) : x
 	.align ALIGNARG(4)
 3:	/* y is a real number.  */
 	subl	$28, %esp
 	cfi_adjust_cfa_offset (28)
 	fstpt	12(%esp)	// x
 	fstpt	(%esp)		// <empty>
-	mov	%edx, 24(%esp)
 	call	HIDDEN_JUMPTARGET (__powl_helper)	// <result>
-	mov	24(%esp), %edx
-	addl	$28, %esp
-	cfi_adjust_cfa_offset (-28)
-	testb	$2, %dh
-	jz	292f
-	// x is negative.  If y is an odd integer, negate the result.
-#ifdef	PIC
-	LOAD_PIC_REG (cx)
-#endif
-	fldt	24(%esp)	// y : abs(result)
-	fld	%st		// y : y : abs(result)
-	fabs			// |y| : y : abs(result)
-	fcompl	MO(p64)		// y : abs(result)
-	fnstsw
-	sahf
-	jnc	291f
-	fldl	MO(p63)		// p63 : y : abs(result)
-	fxch			// y : p63 : abs(result)
-	fprem			// y%p63 : p63 : abs(result)
-	fstp	%st(1)		// y%p63 : abs(result)
-
-	// We must find out whether y is an odd integer.
-	fld	%st		// y : y : abs(result)
-	fistpll	(%esp)		// y : abs(result)
-	fildll	(%esp)		// int(y) : y : abs(result)
-	fucompp			// abs(result)
-	fnstsw
-	sahf
-	jne	292f
-
-	// OK, the value is an integer, but is it odd?
-	popl	%eax
-	cfi_adjust_cfa_offset (-4)
-	popl	%edx
-	cfi_adjust_cfa_offset (-4)
-	andb	$1, %al
-	jz	290f		// jump if not odd
-	// It's an odd integer.
-	fchs
-290:	ret
-	cfi_adjust_cfa_offset (8)
-291:	fstp	%st(0)		// abs(result)
-292:	addl	$8, %esp
-	cfi_adjust_cfa_offset (-8)
+	addl	$36, %esp
+	cfi_adjust_cfa_offset (-36)
 	ret

 	// pow(x,±0) = 1
--- a/sysdeps/i386/fpu/libm-test-ulps
+++ b/sysdeps/i386/fpu/libm-test-ulps
@ -1653,6 +1653,8 @@ double: 1
 float: 1
 idouble: 1
 ifloat: 1
+ildouble: 4
+ldouble: 4

 Function: "pow_tonearest":
 ildouble: 1
@ -1663,14 +1665,16 @@ double: 1
 float: 1
 idouble: 1
 ifloat: 1
+ildouble: 1
+ldouble: 1

 Function: "pow_upward":
 double: 1
 float: 1
 idouble: 1
 ifloat: 1
-ildouble: 1
-ldouble: 1
+ildouble: 2
+ldouble: 2

 Function: "sin":
 ildouble: 1
--- a/sysdeps/ieee754/dbl-64/e_pow.c
+++ b/sysdeps/ieee754/dbl-64/e_pow.c
@ -34,6 +34,7 @@
 /* round to nearest mode of IEEE 754 standard.                             */
 /*                                                                         */
 /***************************************************************************/
+#include <math.h>
 #include "endian.h"
 #include "upow.h"
 #include <dla.h>
@ -91,27 +92,33 @@ __ieee754_pow (double x, double y)
    {				/* if y<-1 or y>1   */
      double retval;

-      SET_RESTORE_ROUND (FE_TONEAREST);
+      {
+	SET_RESTORE_ROUND (FE_TONEAREST);

-      /* Avoid internal underflow for tiny y.  The exact value of y does
-         not matter if |y| <= 2**-64.  */
-      if (ABS (y) < 0x1p-64)
-	y = y < 0 ? -0x1p-64 : 0x1p-64;
-      z = log1 (x, &aa, &error);	/* x^y  =e^(y log (X)) */
-      t = y * CN;
-      y1 = t - (t - y);
-      y2 = y - y1;
-      t = z * CN;
-      a1 = t - (t - z);
-      a2 = (z - a1) + aa;
-      a = y1 * a1;
-      aa = y2 * a1 + y * a2;
-      a1 = a + aa;
-      a2 = (a - a1) + aa;
-      error = error * ABS (y);
-      t = __exp1 (a1, a2, 1.9e16 * error);	/* return -10 or 0 if wasn't computed exactly */
-      retval = (t > 0) ? t : power1 (x, y);
+	/* Avoid internal underflow for tiny y.  The exact value of y does
+	   not matter if |y| <= 2**-64.  */
+	if (ABS (y) < 0x1p-64)
+	  y = y < 0 ? -0x1p-64 : 0x1p-64;
+	z = log1 (x, &aa, &error);	/* x^y  =e^(y log (X)) */
+	t = y * CN;
+	y1 = t - (t - y);
+	y2 = y - y1;
+	t = z * CN;
+	a1 = t - (t - z);
+	a2 = (z - a1) + aa;
+	a = y1 * a1;
+	aa = y2 * a1 + y * a2;
+	a1 = a + aa;
+	a2 = (a - a1) + aa;
+	error = error * ABS (y);
+	t = __exp1 (a1, a2, 1.9e16 * error);	/* return -10 or 0 if wasn't computed exactly */
+	retval = (t > 0) ? t : power1 (x, y);
+      }

+      if (__isinf (retval))
+	retval = huge * huge;
+      else if (retval == 0)
+	retval = tiny * tiny;
      return retval;
    }

@ -164,7 +171,21 @@ __ieee754_pow (double x, double y)
 	    return y < 0 ? 0.0 : INF.x;
 	}
      /* if y even or odd */
-      return (k == 1) ? __ieee754_pow (-x, y) : -__ieee754_pow (-x, y);
+      if (k == 1)
+	return __ieee754_pow (-x, y);
+      else
+	{
+	  double retval;
+	  {
+	    SET_RESTORE_ROUND (FE_TONEAREST);
+	    retval = -__ieee754_pow (-x, y);
+	  }
+	  if (__isinf (retval))
+	    retval = -huge * huge;
+	  else if (retval == 0)
+	    retval = -tiny * tiny;
+	  return retval;
+	}
    }
  /* x>0 */

--- a/sysdeps/x86/fpu/powl_helper.c
+++ b/sysdeps/x86/fpu/powl_helper.c
@ -18,6 +18,7 @@

 #include <math.h>
 #include <math_private.h>
+#include <stdbool.h>

 /* High parts and low parts of -log (k/16), for integer k from 12 to
   24.  */
@ -63,15 +64,32 @@ acc_split (long double *rhi, long double *rlo, long double hi, long double lo,
 extern long double __powl_helper (long double x, long double y);
 libm_hidden_proto (__powl_helper)

-/* Given X a value that is finite and nonzero, or a NaN, and only
-   negative if Y is not an integer, and Y a finite nonzero value with
-   0x1p-79 <= |Y| <= 0x1p78, compute X to the power Y.  */
+/* Given X a value that is finite and nonzero, or a NaN, and Y a
+   finite nonzero value with 0x1p-79 <= |Y| <= 0x1p78, compute X to
+   the power Y.  */

 long double
 __powl_helper (long double x, long double y)
 {
-  if (isnan (x) || x < 0)
+  if (isnan (x))
    return __ieee754_expl (y * __ieee754_logl (x));
+  bool negate;
+  if (x < 0)
+    {
+      long double absy = fabsl (y);
+      if (absy >= 0x1p64L)
+	negate = false;
+      else
+	{
+	  unsigned long long yll = absy;
+	  if (yll != absy)
+	    return __ieee754_expl (y * __ieee754_logl (x));
+	  negate = (yll & 1) != 0;
+	}
+      x = fabsl (x);
+    }
+  else
+    negate = false;

  /* We need to compute Y * log2 (X) to at least 64 bits after the
     point for normal results (that is, to at least 78 bits
@ -199,11 +217,17 @@ __powl_helper (long double x, long double y)
     fractional parts.  */
  long double log2_res_int = __roundl (log2_res_hi);
  long double log2_res_frac = log2_res_hi - log2_res_int + log2_res_lo;
+  /* If the integer part is very large, the computed fractional part
+     may be outside the valid range for f2xm1.  */
+  if (fabsl (log2_res_int) > 16500)
+    log2_res_frac = 0;

  /* Compute the final result.  */
  long double res;
  asm ("f2xm1" : "=t" (res) : "0" (log2_res_frac));
  res += 1.0L;
+  if (negate)
+    res = -res;
  asm ("fscale" : "=t" (res) : "0" (res), "u" (log2_res_int));
  return res;
 }
--- a/sysdeps/x86_64/fpu/e_powl.S
+++ b/sysdeps/x86_64/fpu/e_powl.S
@ -141,7 +141,7 @@ ENTRY(__ieee754_powl)
 	fabs			// |y| : 8 : y : x
 	fcomip	%st(1), %st	// 8 : y : x
 	fstp	%st(0)		// y : x
-	jnc	2f
+	jnc	3f
 	mov	-8(%rsp),%eax
 	mov	-4(%rsp),%edx
 	orl	$0, %edx
@ -154,12 +154,22 @@ ENTRY(__ieee754_powl)
 4:	fldl	MO(one)		// 1 : x
 	fxch

+	/* If y is even, take the absolute value of x.  Otherwise,
+	   ensure all intermediate values that might overflow have the
+	   sign of x.  */
+	testb	$1, %al
+	jnz	6f
+	fabs
+
 6:	shrdl	$1, %edx, %eax
 	jnc	5f
 	fxch
+	fabs
 	fmul	%st(1)		// x : ST*x
 	fxch
-5:	fmul	%st(0), %st	// x*x : ST*x
+5:	fld	%st		// x : x : ST*x
+	fabs			// |x| : x : ST*x
+	fmulp			// |x|*x : ST*x
 	shrl	$1, %edx
 	movl	%eax, %ecx
 	orl	%edx, %ecx
@ -177,71 +187,32 @@ ENTRY(__ieee754_powl)
 	ret

 	.align ALIGNARG(4)
-2:	// y is a large integer (absolute value at least 8), but
-	// may be odd unless at least 1L<<64.  So it may be necessary
-	// to adjust the sign of a negative result afterwards.
-	fxch			// x : y
-	fabs			// |x| : y
-	fxch			// y : |x|
+2:	// y is a large integer (absolute value at least 1L<<63).
 	// If y has absolute value at least 1L<<78, then any finite
 	// nonzero x will result in 0 (underflow), 1 or infinity (overflow).
 	// Saturate y to those bounds to avoid overflow in the calculation
 	// of y*log2(x).
-	fldl	MO(p78)		// 1L<<78 : y : |x|
-	fld	%st(1)		// y : 1L<<78 : y : |x|
-	fabs			// |y| : 1L<<78 : y : |x|
-	fcomip	%st(1), %st	// 1L<<78 : y : |x|
-	fstp	%st(0)		// y : |x|
+	fldl	MO(p78)		// 1L<<78 : y : x
+	fld	%st(1)		// y : 1L<<78 : y : x
+	fabs			// |y| : 1L<<78 : y : x
+	fcomip	%st(1), %st	// 1L<<78 : y : x
+	fstp	%st(0)		// y : x
 	jc	3f
 	fstp	%st(0)		// pop y
-	fldl	MO(p78)		// 1L<<78 : |x|
+	fldl	MO(p78)		// 1L<<78 : x
 	testb	$2, %dl
 	jz	3f		// y > 0
-	fchs			// -(1L<<78) : |x|
+	fchs			// -(1L<<78) : x
 	.align ALIGNARG(4)
 3:	/* y is a real number.  */
 	subq	$40, %rsp
 	cfi_adjust_cfa_offset (40)
 	fstpt	16(%rsp)	// x
 	fstpt	(%rsp)		// <empty>
-	mov	%edx, 32(%rsp)
 	call	HIDDEN_JUMPTARGET (__powl_helper)	// <result>
-	mov	32(%rsp), %edx
 	addq	$40, %rsp
 	cfi_adjust_cfa_offset (-40)
-	testb	$2, %dh
-	jz	292f
-	// x is negative.  If y is an odd integer, negate the result.
-	fldt	24(%rsp)	// y : abs(result)
-	fldl	MO(p64)		// 1L<<64 : y : abs(result)
-	fld	%st(1)		// y : 1L<<64 : y : abs(result)
-	fabs			// |y| : 1L<<64 : y : abs(result)
-	fcomip	%st(1), %st	// 1L<<64 : y : abs(result)
-	fstp	%st(0)		// y : abs(result)
-	jnc	291f
-	fldl	MO(p63)		// p63 : y : abs(result)
-	fxch			// y : p63 : abs(result)
-	fprem			// y%p63 : p63 : abs(result)
-	fstp	%st(1)		// y%p63 : abs(result)
-
-	// We must find out whether y is an odd integer.
-	fld	%st		// y : y : abs(result)
-	fistpll	-8(%rsp)	// y : abs(result)
-	fildll	-8(%rsp)	// int(y) : y : abs(result)
-	fucomip	%st(1),%st	// y : abs(result)
-	ffreep	%st		// abs(result)
-	jne	292f
-
-	// OK, the value is an integer, but is it odd?
-	mov	-8(%rsp), %eax
-	mov	-4(%rsp), %edx
-	andb	$1, %al
-	jz	290f		// jump if not odd
-	// It's an odd integer.
-	fchs
-290:	ret
-291:	fstp	%st(0)		// abs(result)
-292:	ret
+	ret

 	// pow(x,±0) = 1
 	.align ALIGNARG(4)
--- a/sysdeps/x86_64/fpu/libm-test-ulps
+++ b/sysdeps/x86_64/fpu/libm-test-ulps
@ -1736,8 +1736,12 @@ ildouble: 1
 ldouble: 1

 Function: "pow_downward":
+double: 1
 float: 1
+idouble: 1
 ifloat: 1
+ildouble: 4
+ldouble: 4

 Function: "pow_tonearest":
 float: 1
@ -1746,15 +1750,21 @@ ildouble: 1
 ldouble: 1

 Function: "pow_towardzero":
+double: 1
 float: 1
-ifloat: 1
-
-Function: "pow_upward":
-float: 1
+idouble: 1
 ifloat: 1
 ildouble: 1
 ldouble: 1

+Function: "pow_upward":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+
 Function: "sin":
 ildouble: 1
 ldouble: 1
--- a/sysdeps/x86_64/fpu/multiarch/e_pow.c
+++ b/sysdeps/x86_64/fpu/multiarch/e_pow.c
@ -1,5 +1,6 @@
 #ifdef HAVE_FMA4_SUPPORT
 # include <init-arch.h>
+# include <math.h>
 # include <math_private.h>

 extern double __ieee754_pow_sse2 (double, double);