Fix spurious underflows from pow with results close to 1 (bug 14811).

2024-09-20 00:19:57 +00:00 · 2012-11-07 13:03:31 +00:00 · 2012-11-07 13:03:31 +00:00 · 60e235ee2a
commit 60e235ee2a
parent 0ab234b7db
7 changed files with 96 additions and 5 deletions
--- a/16
+++ b/16
@ -1,3 +1,19 @@
+2012-11-07  Joseph Myers  <joseph@codesourcery.com>
+
+	[BZ #14811]
+	* sysdeps/i386/fpu/e_powl.S (pm79): New object.
+	(__ieee754_powl): Saturate nonzero exponents with absolute value
+	below 0x1p-79 to +/- 0x1p-79.
+	* sysdeps/ieee754/dbl-64/e_pow.c (__ieee754_pow): Saturate nonzero
+	exponents with absolute value below 0x1p-64 to +/- 0x1p-64.
+	* sysdeps/ieee754/flt-32/e_powf.c (__ieee754_powf): Saturate
+	nonzero exponents with absolute value below 0x1p-32 to +/-
+	0x1p-32.
+	* sysdeps/x86_64/fpu/e_powl.S (pm79): New object.
+	(__ieee754_powl): Saturate nonzero exponents with absolute value
+	below 0x1p-79 to +/- 0x1p-79.
+	* math/libm-test.inc (pow_test): Add more tests.
+
 2012-11-07  Andreas Krebbel  <Andreas.Krebbel@de.ibm.com>

 	* sysdeps/s390/dl-procinfo.c (_dl_s390_cap_flags): Sync
--- a/2
+++ b/2
@ -18,7 +18,7 @@ Version 2.17
  14518, 14519, 14530, 14532, 14538, 14543, 14544, 14545, 14557, 14562,
  14568, 14576, 14579, 14583, 14587, 14595, 14602, 14610, 14621, 14638,
  14645, 14648, 14652, 14660, 14661, 14669, 14683, 14694, 14716, 14743,
-  14767, 14783, 14784, 14785, 14793, 14796, 14797, 14801, 14805.
+  14767, 14783, 14784, 14785, 14793, 14796, 14797, 14801, 14805, 14811.

 * Support for STT_GNU_IFUNC symbols added for s390 and s390x.
  Optimized versions of memcpy, memset, and memcmp added for System z10 and
--- a/math/libm-test.inc
+++ b/math/libm-test.inc
@ -7743,10 +7743,12 @@ pow_test (void)
  TEST_ff_f (pow, plus_infty, 1e-7L, plus_infty);
  TEST_ff_f (pow, plus_infty, 1, plus_infty);
  TEST_ff_f (pow, plus_infty, 1e7L, plus_infty);
+  TEST_ff_f (pow, plus_infty, min_subnorm_value, plus_infty);

  TEST_ff_f (pow, plus_infty, -1e-7L, 0);
  TEST_ff_f (pow, plus_infty, -1, 0);
  TEST_ff_f (pow, plus_infty, -1e7L, 0);
+  TEST_ff_f (pow, plus_infty, -min_subnorm_value, 0);

  TEST_ff_f (pow, minus_infty, 1, minus_infty);
  TEST_ff_f (pow, minus_infty, 11, minus_infty);
@ -7759,6 +7761,7 @@ pow_test (void)
  TEST_ff_f (pow, minus_infty, 1.1L, plus_infty);
  TEST_ff_f (pow, minus_infty, 11.1L, plus_infty);
  TEST_ff_f (pow, minus_infty, 1001.1L, plus_infty);
+  TEST_ff_f (pow, minus_infty, min_subnorm_value, plus_infty);

  TEST_ff_f (pow, minus_infty, -1, minus_zero);
  TEST_ff_f (pow, minus_infty, -11, minus_zero);
@ -7771,6 +7774,7 @@ pow_test (void)
  TEST_ff_f (pow, minus_infty, -1.1L, 0);
  TEST_ff_f (pow, minus_infty, -11.1L, 0);
  TEST_ff_f (pow, minus_infty, -1001.1L, 0);
+  TEST_ff_f (pow, minus_infty, -min_subnorm_value, 0);
 #endif

  TEST_ff_f (pow, nan_value, nan_value, nan_value);
@ -7793,6 +7797,8 @@ pow_test (void)
  TEST_ff_f (pow, nan_value, minus_infty, nan_value);
  TEST_ff_f (pow, nan_value, 2.5, nan_value);
  TEST_ff_f (pow, nan_value, -2.5, nan_value);
+  TEST_ff_f (pow, nan_value, min_subnorm_value, nan_value);
+  TEST_ff_f (pow, nan_value, -min_subnorm_value, nan_value);

  TEST_ff_f (pow, 1, plus_infty, 1);
  TEST_ff_f (pow, -1, plus_infty, 1);
@ -7806,6 +7812,8 @@ pow_test (void)
  TEST_ff_f (pow, 1, 0x1p63L, 1);
  TEST_ff_f (pow, 1, 0x1p64L, 1);
  TEST_ff_f (pow, 1, 0x1p72L, 1);
+  TEST_ff_f (pow, 1, min_subnorm_value, 1);
+  TEST_ff_f (pow, 1, -min_subnorm_value, 1);

  /* pow (x, +-0) == 1.  */
  TEST_ff_f (pow, plus_infty, 0, 1);
@ -7825,6 +7833,10 @@ pow_test (void)
  TEST_ff_f (pow, -0.1L, -1.1L, nan_value, INVALID_EXCEPTION);
  TEST_ff_f (pow, -10.1L, 1.1L, nan_value, INVALID_EXCEPTION);
  TEST_ff_f (pow, -10.1L, -1.1L, nan_value, INVALID_EXCEPTION);
+  TEST_ff_f (pow, -1.01L, min_subnorm_value, nan_value, INVALID_EXCEPTION);
+  TEST_ff_f (pow, -1.01L, -min_subnorm_value, nan_value, INVALID_EXCEPTION);
+  TEST_ff_f (pow, -1.0L, min_subnorm_value, nan_value, INVALID_EXCEPTION);
+  TEST_ff_f (pow, -1.0L, -min_subnorm_value, nan_value, INVALID_EXCEPTION);

  errno = 0;
  TEST_ff_f (pow, 0, -1, plus_infty, DIVIDE_BY_ZERO_EXCEPTION);
@ -7910,6 +7922,9 @@ pow_test (void)
  TEST_ff_f (pow, 0, -11.1L, plus_infty, DIVIDE_BY_ZERO_EXCEPTION);
  check_int ("errno for pow(0,-num) == ERANGE", errno, ERANGE, 0, 0, 0);
  errno = 0;
+  TEST_ff_f (pow, 0, -min_subnorm_value, plus_infty, DIVIDE_BY_ZERO_EXCEPTION);
+  check_int ("errno for pow(0,-num) == ERANGE", errno, ERANGE, 0, 0, 0);
+  errno = 0;
  TEST_ff_f (pow, 0, -0x1p24, plus_infty, DIVIDE_BY_ZERO_EXCEPTION);
  check_int ("errno for pow(0,-num) == ERANGE", errno, ERANGE, 0, 0, 0);
  errno = 0;
@ -7925,6 +7940,9 @@ pow_test (void)
  TEST_ff_f (pow, minus_zero, -11.1L, plus_infty, DIVIDE_BY_ZERO_EXCEPTION);
  check_int ("errno for pow(-0,-num) == ERANGE", errno, ERANGE, 0, 0, 0);
  errno = 0;
+  TEST_ff_f (pow, minus_zero, -min_subnorm_value, plus_infty, DIVIDE_BY_ZERO_EXCEPTION);
+  check_int ("errno for pow(-0,-num) == ERANGE", errno, ERANGE, 0, 0, 0);
+  errno = 0;
  TEST_ff_f (pow, minus_zero, -0x1p24, plus_infty, DIVIDE_BY_ZERO_EXCEPTION);
  check_int ("errno for pow(-0,-num) == ERANGE", errno, ERANGE, 0, 0, 0);
  errno = 0;
@ -8114,12 +8132,14 @@ pow_test (void)
  TEST_ff_f (pow, 0.0, 0x1p24, 0.0);
  TEST_ff_f (pow, 0.0, 0x1p127, 0.0);
  TEST_ff_f (pow, 0.0, max_value, 0.0);
+  TEST_ff_f (pow, 0.0, min_subnorm_value, 0.0);

  /* pow (-0, y) == +0 for y > 0 and not an odd integer.  */
  TEST_ff_f (pow, minus_zero, 4, 0.0);
  TEST_ff_f (pow, minus_zero, 0x1p24, 0.0);
  TEST_ff_f (pow, minus_zero, 0x1p127, 0.0);
  TEST_ff_f (pow, minus_zero, max_value, 0.0);
+  TEST_ff_f (pow, minus_zero, min_subnorm_value, 0.0);

  TEST_ff_f (pow, 16, 0.25L, 2);
  TEST_ff_f (pow, 0x1p64L, 0.125L, 256);
@ -8407,6 +8427,15 @@ pow_test (void)
  TEST_ff_f (pow, 0x1.0000000000001p0L, -0x1.23456789abcdfp61L, 1.0118762747828234466621210689458255908670e-253L);
 #endif

+  TEST_ff_f (pow, min_subnorm_value, min_subnorm_value, 1.0L);
+  TEST_ff_f (pow, min_subnorm_value, -min_subnorm_value, 1.0L);
+  TEST_ff_f (pow, max_value, min_subnorm_value, 1.0L);
+  TEST_ff_f (pow, max_value, -min_subnorm_value, 1.0L);
+  TEST_ff_f (pow, 0.99L, min_subnorm_value, 1.0L);
+  TEST_ff_f (pow, 0.99L, -min_subnorm_value, 1.0L);
+  TEST_ff_f (pow, 1.01L, min_subnorm_value, 1.0L);
+  TEST_ff_f (pow, 1.01L, -min_subnorm_value, 1.0L);
+
  TEST_ff_f (pow, 2.0L, -100000.0L, plus_zero, UNDERFLOW_EXCEPTION);

  END (pow);
--- a/sysdeps/i386/fpu/e_powl.S
+++ b/sysdeps/i386/fpu/e_powl.S
@ -38,6 +38,9 @@ p64:	.byte 0, 0, 0, 0, 0, 0, 0xf0, 0x43
 	.type p78,@object
 p78:	.byte 0, 0, 0, 0, 0, 0, 0xd0, 0x44
 	ASM_SIZE_DIRECTIVE(p78)
+	.type pm79,@object
+pm79:	.byte 0, 0, 0, 0, 0, 0, 0, 0x3b
+	ASM_SIZE_DIRECTIVE(pm79)

 	.section .rodata.cst16,"aM",@progbits,16

@ -120,9 +123,25 @@ ENTRY(__ieee754_powl)
 	fucomp	%st(1)		// y : x
 	fnstsw
 	sahf
-	jne	3f
+	je	9f

-	/* OK, we have an integer value for y.  */
+	// If y has absolute value at most 0x1p-79, then any finite
+	// nonzero x will result in 1.  Saturate y to those bounds to
+	// avoid underflow in the calculation of y*log2(x).
+	fld	%st		// y : y : x
+	fabs			// |y| : y : x
+	fcompl	MO(pm79)	// y : x
+	fnstsw
+	sahf
+	jnc	3f
+	fstp	%st(0)		// pop y
+	fldl	MO(pm79)	// 0x1p-79 : x
+	testb	$2, %dl
+	jnz	3f		// y > 0
+	fchs			// -0x1p-79 : x
+	jmp	3f
+
+9:	/* OK, we have an integer value for y.  */
 	popl	%eax
 	cfi_adjust_cfa_offset (-4)
 	popl	%edx
--- a/sysdeps/ieee754/dbl-64/e_pow.c
+++ b/sysdeps/ieee754/dbl-64/e_pow.c
@ -90,6 +90,10 @@ __ieee754_pow(double x, double y) {

    SET_RESTORE_ROUND (FE_TONEAREST);

+    /* Avoid internal underflow for tiny y.  The exact value of y does
+       not matter if |y| <= 2**-64.  */
+    if (ABS (y) < 0x1p-64)
+      y = y < 0 ? -0x1p-64 : 0x1p-64;
    z = log1(x,&aa,&error);                                 /* x^y  =e^(y log (X)) */
    t = y*134217729.0;
    y1 = t - (t-y);
--- a/sysdeps/ieee754/flt-32/e_powf.c
+++ b/sysdeps/ieee754/flt-32/e_powf.c
@ -141,6 +141,10 @@ __ieee754_powf(float x, float y)
 	    t2 = v-(t1-u);
 	} else {
 	    float s2,s_h,s_l,t_h,t_l;
+	    /* Avoid internal underflow for tiny y.  The exact value
+	       of y does not matter if |y| <= 2**-32.  */
+	    if (iy < 0x2f800000)
+	      SET_FLOAT_WORD (y, (hy & 0x80000000) | 0x2f800000);
 	    n = 0;
 	/* take care subnormal number */
 	    if(ix<0x00800000)
--- a/sysdeps/x86_64/fpu/e_powl.S
+++ b/sysdeps/x86_64/fpu/e_powl.S
@ -38,6 +38,9 @@ p64:	.byte 0, 0, 0, 0, 0, 0, 0xf0, 0x43
 	.type p78,@object
 p78:	.byte 0, 0, 0, 0, 0, 0, 0xd0, 0x44
 	ASM_SIZE_DIRECTIVE(p78)
+	.type pm79,@object
+pm79:	.byte 0, 0, 0, 0, 0, 0, 0, 0x3b
+	ASM_SIZE_DIRECTIVE(pm79)

 	.section .rodata.cst16,"aM",@progbits,16

@ -110,9 +113,25 @@ ENTRY(__ieee754_powl)
 	fistpll	-8(%rsp)	// y : x
 	fildll	-8(%rsp)	// int(y) : y : x
 	fucomip	%st(1),%st	// y : x
-	jne	3f
+	je	9f

-	/* OK, we have an integer value for y.  */
+	// If y has absolute value at most 0x1p-79, then any finite
+	// nonzero x will result in 1.  Saturate y to those bounds to
+	// avoid underflow in the calculation of y*log2(x).
+	fldl	MO(pm79)	// 0x1p-79 : y : x
+	fld	%st(1)		// y : 0x1p-79 : y : x
+	fabs			// |y| : 0x1p-79 : y : x
+	fcomip	%st(1), %st	// 0x1p-79 : y : x
+	fstp	%st(0)		// y : x
+	jnc	3f
+	fstp	%st(0)		// pop y
+	fldl	MO(pm79)	// 0x1p-79 : x
+	testb	$2, %dl
+	jnz	3f		// y > 0
+	fchs			// -0x1p-79 : x
+	jmp	3f
+
+9:	/* OK, we have an integer value for y.  */
 	mov	-8(%rsp),%eax
 	mov	-4(%rsp),%edx
 	orl	$0, %edx