2001-02-19  Ulrich Drepper  <drepper@redhat.com>

	* libio/iogetline.c: Move return until after last statement.

	* localedata/show-ucs-data.c: Don't show < > for better readability.

	* sysdeps/ia64/fpu/Dist: New file.
	* sysdeps/ia64/fpu/Makefile: New file.
	* sysdeps/ia64/fpu/Versions: New file.
	* sysdeps/ia64/fpu/e_acos.S: New file.
	* sysdeps/ia64/fpu/e_acosf.S: New file.
	* sysdeps/ia64/fpu/e_acosl.S: New file.
	* sysdeps/ia64/fpu/e_asin.S: New file.
	* sysdeps/ia64/fpu/e_asinf.S: New file.
	* sysdeps/ia64/fpu/e_asinl.S: New file.
	* sysdeps/ia64/fpu/e_atan2.S: New file.
	* sysdeps/ia64/fpu/e_atan2f.S: New file.
	* sysdeps/ia64/fpu/e_atan2l.c: New file.
	* sysdeps/ia64/fpu/e_cosh.S: New file.
	* sysdeps/ia64/fpu/e_coshf.S: New file.
	* sysdeps/ia64/fpu/e_coshl.S: New file.
	* sysdeps/ia64/fpu/e_exp.S: New file.
	* sysdeps/ia64/fpu/e_expf.S: New file.
	* sysdeps/ia64/fpu/e_expl.c: New file.
	* sysdeps/ia64/fpu/e_fmod.S: New file.
	* sysdeps/ia64/fpu/e_fmodf.S: New file.
	* sysdeps/ia64/fpu/e_fmodl.S: New file.
	* sysdeps/ia64/fpu/e_hypot.S: New file.
	* sysdeps/ia64/fpu/e_hypotf.S: New file.
	* sysdeps/ia64/fpu/e_hypotl.S: New file.
	* sysdeps/ia64/fpu/e_log.S: New file.
	* sysdeps/ia64/fpu/e_log10.c: New file.
	* sysdeps/ia64/fpu/e_log10f.c: New file.
	* sysdeps/ia64/fpu/e_log10l.c: New file.
	* sysdeps/ia64/fpu/e_logf.S: New file.
	* sysdeps/ia64/fpu/e_logl.c: New file.
	* sysdeps/ia64/fpu/e_pow.S: New file.
	* sysdeps/ia64/fpu/e_powf.S: New file.
	* sysdeps/ia64/fpu/e_powl.S: New file.
	* sysdeps/ia64/fpu/e_rem_pio2.c: New file.
	* sysdeps/ia64/fpu/e_rem_pio2f.c: New file.
	* sysdeps/ia64/fpu/e_remainder.S: New file.
	* sysdeps/ia64/fpu/e_remainderf.S: New file.
	* sysdeps/ia64/fpu/e_remainderl.S: New file.
	* sysdeps/ia64/fpu/e_scalb.S: New file.
	* sysdeps/ia64/fpu/e_scalbf.S: New file.
	* sysdeps/ia64/fpu/e_scalbl.S: New file.
	* sysdeps/ia64/fpu/e_sinh.S: New file.
	* sysdeps/ia64/fpu/e_sinhf.S: New file.
	* sysdeps/ia64/fpu/e_sinhl.S: New file.
	* sysdeps/ia64/fpu/e_sqrt.S: New file.
	* sysdeps/ia64/fpu/e_sqrtf.S: New file.
	* sysdeps/ia64/fpu/e_sqrtl.S: New file.
	* sysdeps/ia64/fpu/k_rem_pio2.c: New file.
	* sysdeps/ia64/fpu/k_rem_pio2f.c: New file.
	* sysdeps/ia64/fpu/k_rem_pio2l.c: New file.
	* sysdeps/ia64/fpu/libm_atan2_reg.S: New file.
	* sysdeps/ia64/fpu/libm_error.c: New file.
	* sysdeps/ia64/fpu/libm_frexp4.S: New file.
	* sysdeps/ia64/fpu/libm_frexp4f.S: New file.
	* sysdeps/ia64/fpu/libm_frexp4l.S: New file.
	* sysdeps/ia64/fpu/libm_reduce.S: New file.
	* sysdeps/ia64/fpu/libm_support.h: New file.
	* sysdeps/ia64/fpu/libm_tan.S: New file.
	* sysdeps/ia64/fpu/s_atan.S: New file.
	* sysdeps/ia64/fpu/s_atanf.S: New file.
	* sysdeps/ia64/fpu/s_atanl.S: New file.
	* sysdeps/ia64/fpu/s_cbrt.S: New file.
	* sysdeps/ia64/fpu/s_cbrtf.S: New file.
	* sysdeps/ia64/fpu/s_cbrtl.S: New file.
	* sysdeps/ia64/fpu/s_ceil.S: New file.
	* sysdeps/ia64/fpu/s_ceilf.S: New file.
	* sysdeps/ia64/fpu/s_ceill.S: New file.
	* sysdeps/ia64/fpu/s_cos.S: New file.
	* sysdeps/ia64/fpu/s_cosf.S: New file.
	* sysdeps/ia64/fpu/s_cosl.S: New file.
	* sysdeps/ia64/fpu/s_expm1.S: New file.
	* sysdeps/ia64/fpu/s_expm1f.S: New file.
	* sysdeps/ia64/fpu/s_expm1l.S: New file.
	* sysdeps/ia64/fpu/s_floor.S: New file.
	* sysdeps/ia64/fpu/s_floorf.S: New file.
	* sysdeps/ia64/fpu/s_floorl.S: New file.
	* sysdeps/ia64/fpu/s_frexp.c: New file.
	* sysdeps/ia64/fpu/s_frexpf.c: New file.
	* sysdeps/ia64/fpu/s_frexpl.c: New file.
	* sysdeps/ia64/fpu/s_ilogb.S: New file.
	* sysdeps/ia64/fpu/s_ilogbf.S: New file.
	* sysdeps/ia64/fpu/s_ilogbl.S: New file.
	* sysdeps/ia64/fpu/s_ldexp.S: New file.
	* sysdeps/ia64/fpu/s_ldexpf.S: New file.
	* sysdeps/ia64/fpu/s_ldexpl.S: New file.
	* sysdeps/ia64/fpu/s_log1p.S: New file.
	* sysdeps/ia64/fpu/s_log1pf.S: New file.
	* sysdeps/ia64/fpu/s_log1pl.S: New file.
	* sysdeps/ia64/fpu/s_logb.S: New file.
	* sysdeps/ia64/fpu/s_logbf.S: New file.
	* sysdeps/ia64/fpu/s_logbl.S: New file.
	* sysdeps/ia64/fpu/s_matherrf.c: New file.
	* sysdeps/ia64/fpu/s_matherrl.c: New file.
	* sysdeps/ia64/fpu/s_modf.S: New file.
	* sysdeps/ia64/fpu/s_modff.S: New file.
	* sysdeps/ia64/fpu/s_modfl.S: New file.
	* sysdeps/ia64/fpu/s_nearbyint.S: New file.
	* sysdeps/ia64/fpu/s_nearbyintf.S: New file.
	* sysdeps/ia64/fpu/s_nearbyintl.S: New file.
	* sysdeps/ia64/fpu/s_rint.S: New file.
	* sysdeps/ia64/fpu/s_rintf.S: New file.
	* sysdeps/ia64/fpu/s_rintl.S: New file.
	* sysdeps/ia64/fpu/s_round.S: New file.
	* sysdeps/ia64/fpu/s_roundf.S: New file.
	* sysdeps/ia64/fpu/s_roundl.S: New file.
	* sysdeps/ia64/fpu/s_scalbn.S: New file.
	* sysdeps/ia64/fpu/s_scalbnf.S: New file.
	* sysdeps/ia64/fpu/s_scalbnl.S: New file.
	* sysdeps/ia64/fpu/s_significand.S: New file.
	* sysdeps/ia64/fpu/s_significandf.S: New file.
	* sysdeps/ia64/fpu/s_significandl.S: New file.
	* sysdeps/ia64/fpu/s_sin.c: New file.
	* sysdeps/ia64/fpu/s_sincos.c: New file.
	* sysdeps/ia64/fpu/s_sincosf.c: New file.
	* sysdeps/ia64/fpu/s_sincosl.c: New file.
	* sysdeps/ia64/fpu/s_sinf.c: New file.
	* sysdeps/ia64/fpu/s_sinl.c: New file.
	* sysdeps/ia64/fpu/s_tan.S: New file.
	* sysdeps/ia64/fpu/s_tanf.S: New file.
	* sysdeps/ia64/fpu/s_tanl.S: New file.
	* sysdeps/ia64/fpu/s_trunc.S: New file.
	* sysdeps/ia64/fpu/s_truncf.S: New file.
	* sysdeps/ia64/fpu/s_truncl.S: New file.
	* sysdeps/ia64/fpu/w_acos.c: New file.
	* sysdeps/ia64/fpu/w_acosf.c: New file.
	* sysdeps/ia64/fpu/w_acosl.c: New file.
	* sysdeps/ia64/fpu/w_asin.c: New file.
	* sysdeps/ia64/fpu/w_asinf.c: New file.
	* sysdeps/ia64/fpu/w_asinl.c: New file.
	* sysdeps/ia64/fpu/w_atan2.c: New file.
	* sysdeps/ia64/fpu/w_atan2f.c: New file.
	* sysdeps/ia64/fpu/w_atan2l.c: New file.
	* sysdeps/ia64/fpu/w_cosh.c: New file.
	* sysdeps/ia64/fpu/w_coshf.c: New file.
	* sysdeps/ia64/fpu/w_coshl.c: New file.
	* sysdeps/ia64/fpu/w_exp.c: New file.
	* sysdeps/ia64/fpu/w_expf.c: New file.
	* sysdeps/ia64/fpu/w_fmod.c: New file.
	* sysdeps/ia64/fpu/w_fmodf.c: New file.
	* sysdeps/ia64/fpu/w_fmodl.c: New file.
	* sysdeps/ia64/fpu/w_hypot.c: New file.
	* sysdeps/ia64/fpu/w_hypotf.c: New file.
	* sysdeps/ia64/fpu/w_hypotl.c: New file.
	* sysdeps/ia64/fpu/w_log.c: New file.
	* sysdeps/ia64/fpu/w_log10.c: New file.
	* sysdeps/ia64/fpu/w_log10f.c: New file.
	* sysdeps/ia64/fpu/w_log10l.c: New file.
	* sysdeps/ia64/fpu/w_logf.c: New file.
	* sysdeps/ia64/fpu/w_logl.c: New file.
	* sysdeps/ia64/fpu/w_pow.c: New file.
	* sysdeps/ia64/fpu/w_powf.c: New file.
	* sysdeps/ia64/fpu/w_powl.c: New file.
	* sysdeps/ia64/fpu/w_remainder.c: New file.
	* sysdeps/ia64/fpu/w_remainderf.c: New file.
	* sysdeps/ia64/fpu/w_remainderl.c: New file.
	* sysdeps/ia64/fpu/w_scalb.c: New file.
	* sysdeps/ia64/fpu/w_scalbf.c: New file.
	* sysdeps/ia64/fpu/w_scalbl.c: New file.
	* sysdeps/ia64/fpu/w_sqrt.c: New file.
	* sysdeps/ia64/fpu/w_sqrtf.c: New file.
	* sysdeps/ia64/fpu/w_sqrtl.c: New file.
	* sysdeps/ia64/fpu/libm-test-ulps: Adjust for long double
	implementation.
	* sysdeps/ia64/fpu/bits/mathdef.h: Correct float_t and double_t types.
	Change FP_ILOGBNAN for new implementation.
	* Verions.def: Add 2.2.3 versions.
This commit is contained in:
Ulrich Drepper 2001-02-19 09:09:18 +00:00
parent e208f556ca
commit 8da2915d5d
167 changed files with 80568 additions and 101 deletions

173
ChangeLog
View File

@ -1,3 +1,176 @@
2001-02-19 Ulrich Drepper <drepper@redhat.com>
* libio/iogetline.c: Move return until after last statement.
* localedata/show-ucs-data.c: Don't show < > for better readability.
* sysdeps/ia64/fpu/Dist: New file.
* sysdeps/ia64/fpu/Makefile: New file.
* sysdeps/ia64/fpu/Versions: New file.
* sysdeps/ia64/fpu/e_acos.S: New file.
* sysdeps/ia64/fpu/e_acosf.S: New file.
* sysdeps/ia64/fpu/e_acosl.S: New file.
* sysdeps/ia64/fpu/e_asin.S: New file.
* sysdeps/ia64/fpu/e_asinf.S: New file.
* sysdeps/ia64/fpu/e_asinl.S: New file.
* sysdeps/ia64/fpu/e_atan2.S: New file.
* sysdeps/ia64/fpu/e_atan2f.S: New file.
* sysdeps/ia64/fpu/e_atan2l.c: New file.
* sysdeps/ia64/fpu/e_cosh.S: New file.
* sysdeps/ia64/fpu/e_coshf.S: New file.
* sysdeps/ia64/fpu/e_coshl.S: New file.
* sysdeps/ia64/fpu/e_exp.S: New file.
* sysdeps/ia64/fpu/e_expf.S: New file.
* sysdeps/ia64/fpu/e_expl.c: New file.
* sysdeps/ia64/fpu/e_fmod.S: New file.
* sysdeps/ia64/fpu/e_fmodf.S: New file.
* sysdeps/ia64/fpu/e_fmodl.S: New file.
* sysdeps/ia64/fpu/e_hypot.S: New file.
* sysdeps/ia64/fpu/e_hypotf.S: New file.
* sysdeps/ia64/fpu/e_hypotl.S: New file.
* sysdeps/ia64/fpu/e_log.S: New file.
* sysdeps/ia64/fpu/e_log10.c: New file.
* sysdeps/ia64/fpu/e_log10f.c: New file.
* sysdeps/ia64/fpu/e_log10l.c: New file.
* sysdeps/ia64/fpu/e_logf.S: New file.
* sysdeps/ia64/fpu/e_logl.c: New file.
* sysdeps/ia64/fpu/e_pow.S: New file.
* sysdeps/ia64/fpu/e_powf.S: New file.
* sysdeps/ia64/fpu/e_powl.S: New file.
* sysdeps/ia64/fpu/e_rem_pio2.c: New file.
* sysdeps/ia64/fpu/e_rem_pio2f.c: New file.
* sysdeps/ia64/fpu/e_remainder.S: New file.
* sysdeps/ia64/fpu/e_remainderf.S: New file.
* sysdeps/ia64/fpu/e_remainderl.S: New file.
* sysdeps/ia64/fpu/e_scalb.S: New file.
* sysdeps/ia64/fpu/e_scalbf.S: New file.
* sysdeps/ia64/fpu/e_scalbl.S: New file.
* sysdeps/ia64/fpu/e_sinh.S: New file.
* sysdeps/ia64/fpu/e_sinhf.S: New file.
* sysdeps/ia64/fpu/e_sinhl.S: New file.
* sysdeps/ia64/fpu/e_sqrt.S: New file.
* sysdeps/ia64/fpu/e_sqrtf.S: New file.
* sysdeps/ia64/fpu/e_sqrtl.S: New file.
* sysdeps/ia64/fpu/k_rem_pio2.c: New file.
* sysdeps/ia64/fpu/k_rem_pio2f.c: New file.
* sysdeps/ia64/fpu/k_rem_pio2l.c: New file.
* sysdeps/ia64/fpu/libm_atan2_reg.S: New file.
* sysdeps/ia64/fpu/libm_error.c: New file.
* sysdeps/ia64/fpu/libm_frexp4.S: New file.
* sysdeps/ia64/fpu/libm_frexp4f.S: New file.
* sysdeps/ia64/fpu/libm_frexp4l.S: New file.
* sysdeps/ia64/fpu/libm_reduce.S: New file.
* sysdeps/ia64/fpu/libm_support.h: New file.
* sysdeps/ia64/fpu/libm_tan.S: New file.
* sysdeps/ia64/fpu/s_atan.S: New file.
* sysdeps/ia64/fpu/s_atanf.S: New file.
* sysdeps/ia64/fpu/s_atanl.S: New file.
* sysdeps/ia64/fpu/s_cbrt.S: New file.
* sysdeps/ia64/fpu/s_cbrtf.S: New file.
* sysdeps/ia64/fpu/s_cbrtl.S: New file.
* sysdeps/ia64/fpu/s_ceil.S: New file.
* sysdeps/ia64/fpu/s_ceilf.S: New file.
* sysdeps/ia64/fpu/s_ceill.S: New file.
* sysdeps/ia64/fpu/s_cos.S: New file.
* sysdeps/ia64/fpu/s_cosf.S: New file.
* sysdeps/ia64/fpu/s_cosl.S: New file.
* sysdeps/ia64/fpu/s_expm1.S: New file.
* sysdeps/ia64/fpu/s_expm1f.S: New file.
* sysdeps/ia64/fpu/s_expm1l.S: New file.
* sysdeps/ia64/fpu/s_floor.S: New file.
* sysdeps/ia64/fpu/s_floorf.S: New file.
* sysdeps/ia64/fpu/s_floorl.S: New file.
* sysdeps/ia64/fpu/s_frexp.c: New file.
* sysdeps/ia64/fpu/s_frexpf.c: New file.
* sysdeps/ia64/fpu/s_frexpl.c: New file.
* sysdeps/ia64/fpu/s_ilogb.S: New file.
* sysdeps/ia64/fpu/s_ilogbf.S: New file.
* sysdeps/ia64/fpu/s_ilogbl.S: New file.
* sysdeps/ia64/fpu/s_ldexp.S: New file.
* sysdeps/ia64/fpu/s_ldexpf.S: New file.
* sysdeps/ia64/fpu/s_ldexpl.S: New file.
* sysdeps/ia64/fpu/s_log1p.S: New file.
* sysdeps/ia64/fpu/s_log1pf.S: New file.
* sysdeps/ia64/fpu/s_log1pl.S: New file.
* sysdeps/ia64/fpu/s_logb.S: New file.
* sysdeps/ia64/fpu/s_logbf.S: New file.
* sysdeps/ia64/fpu/s_logbl.S: New file.
* sysdeps/ia64/fpu/s_matherrf.c: New file.
* sysdeps/ia64/fpu/s_matherrl.c: New file.
* sysdeps/ia64/fpu/s_modf.S: New file.
* sysdeps/ia64/fpu/s_modff.S: New file.
* sysdeps/ia64/fpu/s_modfl.S: New file.
* sysdeps/ia64/fpu/s_nearbyint.S: New file.
* sysdeps/ia64/fpu/s_nearbyintf.S: New file.
* sysdeps/ia64/fpu/s_nearbyintl.S: New file.
* sysdeps/ia64/fpu/s_rint.S: New file.
* sysdeps/ia64/fpu/s_rintf.S: New file.
* sysdeps/ia64/fpu/s_rintl.S: New file.
* sysdeps/ia64/fpu/s_round.S: New file.
* sysdeps/ia64/fpu/s_roundf.S: New file.
* sysdeps/ia64/fpu/s_roundl.S: New file.
* sysdeps/ia64/fpu/s_scalbn.S: New file.
* sysdeps/ia64/fpu/s_scalbnf.S: New file.
* sysdeps/ia64/fpu/s_scalbnl.S: New file.
* sysdeps/ia64/fpu/s_significand.S: New file.
* sysdeps/ia64/fpu/s_significandf.S: New file.
* sysdeps/ia64/fpu/s_significandl.S: New file.
* sysdeps/ia64/fpu/s_sin.c: New file.
* sysdeps/ia64/fpu/s_sincos.c: New file.
* sysdeps/ia64/fpu/s_sincosf.c: New file.
* sysdeps/ia64/fpu/s_sincosl.c: New file.
* sysdeps/ia64/fpu/s_sinf.c: New file.
* sysdeps/ia64/fpu/s_sinl.c: New file.
* sysdeps/ia64/fpu/s_tan.S: New file.
* sysdeps/ia64/fpu/s_tanf.S: New file.
* sysdeps/ia64/fpu/s_tanl.S: New file.
* sysdeps/ia64/fpu/s_trunc.S: New file.
* sysdeps/ia64/fpu/s_truncf.S: New file.
* sysdeps/ia64/fpu/s_truncl.S: New file.
* sysdeps/ia64/fpu/w_acos.c: New file.
* sysdeps/ia64/fpu/w_acosf.c: New file.
* sysdeps/ia64/fpu/w_acosl.c: New file.
* sysdeps/ia64/fpu/w_asin.c: New file.
* sysdeps/ia64/fpu/w_asinf.c: New file.
* sysdeps/ia64/fpu/w_asinl.c: New file.
* sysdeps/ia64/fpu/w_atan2.c: New file.
* sysdeps/ia64/fpu/w_atan2f.c: New file.
* sysdeps/ia64/fpu/w_atan2l.c: New file.
* sysdeps/ia64/fpu/w_cosh.c: New file.
* sysdeps/ia64/fpu/w_coshf.c: New file.
* sysdeps/ia64/fpu/w_coshl.c: New file.
* sysdeps/ia64/fpu/w_exp.c: New file.
* sysdeps/ia64/fpu/w_expf.c: New file.
* sysdeps/ia64/fpu/w_fmod.c: New file.
* sysdeps/ia64/fpu/w_fmodf.c: New file.
* sysdeps/ia64/fpu/w_fmodl.c: New file.
* sysdeps/ia64/fpu/w_hypot.c: New file.
* sysdeps/ia64/fpu/w_hypotf.c: New file.
* sysdeps/ia64/fpu/w_hypotl.c: New file.
* sysdeps/ia64/fpu/w_log.c: New file.
* sysdeps/ia64/fpu/w_log10.c: New file.
* sysdeps/ia64/fpu/w_log10f.c: New file.
* sysdeps/ia64/fpu/w_log10l.c: New file.
* sysdeps/ia64/fpu/w_logf.c: New file.
* sysdeps/ia64/fpu/w_logl.c: New file.
* sysdeps/ia64/fpu/w_pow.c: New file.
* sysdeps/ia64/fpu/w_powf.c: New file.
* sysdeps/ia64/fpu/w_powl.c: New file.
* sysdeps/ia64/fpu/w_remainder.c: New file.
* sysdeps/ia64/fpu/w_remainderf.c: New file.
* sysdeps/ia64/fpu/w_remainderl.c: New file.
* sysdeps/ia64/fpu/w_scalb.c: New file.
* sysdeps/ia64/fpu/w_scalbf.c: New file.
* sysdeps/ia64/fpu/w_scalbl.c: New file.
* sysdeps/ia64/fpu/w_sqrt.c: New file.
* sysdeps/ia64/fpu/w_sqrtf.c: New file.
* sysdeps/ia64/fpu/w_sqrtl.c: New file.
* sysdeps/ia64/fpu/libm-test-ulps: Adjust for long double
implementation.
* sysdeps/ia64/fpu/bits/mathdef.h: Correct float_t and double_t types.
Change FP_ILOGBNAN for new implementation.
* Verions.def: Add 2.2.3 versions.
2001-02-18 Ulrich Drepper <drepper@redhat.com>
* math/libm-test.inc (scalb_test): Require invalid exception being

View File

@ -11,6 +11,7 @@ libc {
GLIBC_2.2
GLIBC_2.2.1
GLIBC_2.2.2
GLIBC_2.2.3
%ifdef USE_IN_LIBIO
HURD_CTHREADS_0.3
%endif
@ -31,6 +32,7 @@ libm {
GLIBC_2.0
GLIBC_2.1
GLIBC_2.2
GLIBC_2.2.3
}
libnsl {
GLIBC_2.0

View File

@ -1,4 +1,4 @@
/* Copyright (C) 1993, 1997, 1998, 2000 Free Software Foundation, Inc.
/* Copyright (C) 1993, 1997, 1998, 2000, 2001 Free Software Foundation, Inc.
This file is part of the GNU IO Library.
This library is free software; you can redistribute it and/or
@ -78,9 +78,9 @@ _IO_getline_info (fp, buf, n, delim, extract_delim, eof)
*ptr++ = c;
else if (extract_delim < 0)
_IO_sputbackc (fp, c);
return ptr - buf;
if (extract_delim > 0)
++len;
return ptr - buf;
}
*ptr++ = c;
n--;

View File

@ -32,7 +32,7 @@ main (int argc, char *argv[])
{
unsigned int val = strtoul (runp + 2, NULL, 16);
putchar ('<');
//putchar ('<');
if (val < 128)
putchar (val);
else if (val < 0x800)
@ -46,7 +46,7 @@ main (int argc, char *argv[])
putchar (0x80 | ((val >> 6) & 0x3f));
putchar (0x80 | (val & 0x3f));
}
putchar ('>');
//putchar ('>');
runp += 7;
}
else

6
sysdeps/ia64/fpu/Dist Normal file
View File

@ -0,0 +1,6 @@
libm_atan2_reg.S
libm_error.c
libm_reduce.S
libm_support.h
s_matherrf
s_matherrl

View File

@ -0,0 +1,7 @@
ifeq ($(subdir),math)
libm-sysdep_routines += libm_atan2_reg s_matherrf s_matherrl libm_reduce \
libm_tan
routines += libm_frexp4 libm_frexp4f libm_frexp4l libm_error
CPPFLAGS += -DSIZE_INT_32
endif

10
sysdeps/ia64/fpu/Versions Normal file
View File

@ -0,0 +1,10 @@
libc {
GLIBC_2.2.3 {
__libm_frexp_4; __libm_frexp_4f; __libm_frexp_4l; __libm_error_support;
}
}
libm {
GLIBC_2.2.3 {
matherrf; matherrl;
}
}

View File

@ -1,4 +1,4 @@
/* Copyright (C) 2000 Free Software Foundation, Inc.
/* Copyright (C) 2000, 2001 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@ -23,19 +23,18 @@
#if defined __USE_ISOC99 && defined _MATH_H && !defined _MATH_H_MATHDEF
# define _MATH_H_MATHDEF 1
/* The ix87 FPUs evaluate all values in the 80 bit floating-point format
which is also available for the user as `long double'. Therefore we
define: */
typedef long double float_t; /* `float' expressions are evaluated as
`long double'. */
typedef long double double_t; /* `double' expressions are evaluated as
`long double'. */
/* The IA-64 architecture computes values with the precision of the
used type. */
typedef float float_t; /* `float' expressions are evaluated as
`float'. */
typedef double double_t; /* `double' expressions are evaluated as
`double'. */
/* Define `INFINITY' as value of type `float'. */
# define INFINITY HUGE_VALF
/* The values returned by `ilogb' for 0 and NaN respectively. */
# define FP_ILOGB0 (-2147483647 - 1)
# define FP_ILOGBNAN (-2147483647 - 1)
# define FP_ILOGBNAN 2147483647
#endif /* ISO C99 */

904
sysdeps/ia64/fpu/e_acos.S Normal file
View File

@ -0,0 +1,904 @@
.file "acos.s"
// Copyright (c) 2000, 2001, Intel Corporation
// All rights reserved.
//
// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
//
// WARRANTY DISCLAIMER
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
// http://developer.intel.com/opensource.
// History
//==============================================================
// 2/02/00 Initial version
// 8/17/00 New and much faster algorithm.
// 8/30/00 Avoided bank conflicts on loads, shortened |x|=1 and x=0 paths,
// fixed mfb split issue stalls.
// Description
//=========================================
// The acos function computes the principle value of the arc sine of x.
// A doman error occurs for arguments not in the range [-1,+1].
// The acos function returns the arc cosine in the range [0, +pi] radians.
// acos(1) returns +0, acos(-1) returns pi, acos(0) returns pi/2.
// acos(x) returns a Nan and raises the invalid exception for |x| >1
// The acos function is just like asin except that pi/2 is added at the end.
//
// Assembly macros
//=========================================
#include "libm_support.h"
// predicate registers
//acos_pred_LEsqrt2by2 = p7
//acos_pred_GTsqrt2by2 = p8
// integer registers
ASIN_Addr1 = r33
ASIN_Addr2 = r34
ASIN_FFFE = r35
GR_SAVE_B0 = r36
GR_SAVE_PFS = r37
GR_SAVE_GP = r38
GR_Parameter_X = r39
GR_Parameter_Y = r40
GR_Parameter_RESULT = r41
GR_Parameter_Tag = r42
// floating point registers
acos_coeff_P1 = f32
acos_coeff_P2 = f33
acos_coeff_P3 = f34
acos_coeff_P4 = f35
acos_coeff_P5 = f36
acos_coeff_P6 = f37
acos_coeff_P7 = f38
acos_coeff_P8 = f39
acos_coeff_P9 = f40
acos_coeff_P10 = f41
acos_coeff_P11 = f42
acos_coeff_P12 = f43
acos_coeff_P13 = f44
acos_coeff_P14 = f45
acos_coeff_P15 = f46
acos_coeff_P16 = f47
acos_coeff_P17 = f48
acos_coeff_P18 = f49
acos_coeff_P19 = f50
acos_coeff_P20 = f51
acos_coeff_P21 = f52
acos_const_sqrt2by2 = f53
acos_const_piby2 = f54
acos_abs_x = f55
acos_tx = f56
acos_tx2 = f57
acos_tx3 = f58
acos_tx4 = f59
acos_tx8 = f60
acos_tx11 = f61
acos_1poly_p8 = f62
acos_1poly_p19 = f63
acos_1poly_p4 = f64
acos_1poly_p15 = f65
acos_1poly_p6 = f66
acos_1poly_p17 = f67
acos_1poly_p0 = f68
acos_1poly_p11 = f69
acos_1poly_p2 = f70
acos_1poly_p13 = f71
acos_series_tx = f72
acos_t = f73
acos_t2 = f74
acos_t3 = f75
acos_t4 = f76
acos_t8 = f77
acos_t11 = f78
acos_poly_p8 = f79
acos_poly_p19 = f80
acos_poly_p4 = f81
acos_poly_p15 = f82
acos_poly_p6 = f83
acos_poly_p17 = f84
acos_poly_p0 = f85
acos_poly_p11 = f86
acos_poly_p2 = f87
acos_poly_p13 = f88
acos_series_t = f89
acos_1by2 = f90
acos_3by2 = f91
acos_5by2 = f92
acos_11by4 = f93
acos_35by8 = f94
acos_63by8 = f95
acos_231by16 = f96
acos_y0 = f97
acos_H0 = f98
acos_S0 = f99
acos_d = f100
acos_l1 = f101
acos_d2 = f102
acos_T0 = f103
acos_d1 = f104
acos_e0 = f105
acos_l2 = f106
acos_d3 = f107
acos_T3 = f108
acos_S1 = f109
acos_e1 = f110
acos_z = f111
answer2 = f112
acos_sgn_x = f113
acos_429by16 = f114
acos_18by4 = f115
acos_3by4 = f116
acos_l3 = f117
acos_T6 = f118
acos_const_add = f119
// Data tables
//==============================================================
#ifdef _LIBC
.rodata
#else
.data
#endif
.align 16
acos_coeff_1_table:
ASM_TYPE_DIRECTIVE(acos_coeff_1_table,@object)
data8 0xE4E7E0A423A21249 , 0x00003FF8 //P7
data8 0xC2F7EE0200FCE2A5 , 0x0000C003 //P18
data8 0xB745D7F6C65C20E0 , 0x00003FF9 //P5
data8 0xF75E381A323D4D94 , 0x0000C002 //P16
data8 0x8959C2629C1024C0 , 0x0000C002 //P20
data8 0xAFF68E7D241292C5 , 0x00003FF8 //P9
data8 0xB6DB6DB7260AC30D , 0x00003FFA //P3
data8 0xD0417CE2B41CB7BF , 0x0000C000 //P14
data8 0x81D570FEA724E3E4 , 0x0000BFFD //P12
data8 0xAAAAAAAAAAAAC277 , 0x00003FFC //P1
data8 0xF534912FF3E7B76F , 0x00003FFF //P21
data8 0xc90fdaa22168c235 , 0x00003fff // pi/2
data8 0x0000000000000000 , 0x00000000 // pad to avoid bank conflicts
ASM_SIZE_DIRECTIVE(acos_coeff_1_table)
acos_coeff_2_table:
ASM_TYPE_DIRECTIVE(acos_coeff_2_table,@object)
data8 0x8E26AF5F29B39A2A , 0x00003FF9 //P6
data8 0xB4F118A4B1015470 , 0x00004003 //P17
data8 0xF8E38E10C25990E0 , 0x00003FF9 //P4
data8 0x80F50489AEF1CAC6 , 0x00004002 //P15
data8 0x92728015172CFE1C , 0x00004003 //P19
data8 0xBBC3D831D4595971 , 0x00003FF8 //P8
data8 0x999999999952A5C3 , 0x00003FFB //P2
data8 0x855576BE6F0975EC , 0x00003FFF //P13
data8 0xF12420E778077D89 , 0x00003FFA //P11
data8 0xB6590FF4D23DE003 , 0x00003FF3 //P10
data8 0xb504f333f9de6484 , 0x00003ffe // sqrt(2)/2
ASM_SIZE_DIRECTIVE(acos_coeff_2_table)
.align 32
.global acos
ASM_TYPE_DIRECTIVE(acos,@function)
.section .text
.proc acos
.align 32
acos:
{ .mfi
alloc r32 = ar.pfs,1,6,4,0
fma.s1 acos_tx = f8,f8,f0
addl ASIN_Addr2 = @ltoff(acos_coeff_2_table),gp
}
{ .mfi
mov ASIN_FFFE = 0xFFFE
fnma.s1 acos_t = f8,f8,f1
addl ASIN_Addr1 = @ltoff(acos_coeff_1_table),gp
}
;;
{ .mfi
setf.exp acos_1by2 = ASIN_FFFE
fmerge.s acos_abs_x = f1,f8
nop.i 999 ;;
}
{ .mmf
ld8 ASIN_Addr1 = [ASIN_Addr1]
ld8 ASIN_Addr2 = [ASIN_Addr2]
fmerge.s acos_sgn_x = f8,f1
}
;;
{ .mfi
nop.m 999
fcmp.lt.s1 p11,p12 = f8, f0
nop.i 999 ;;
}
{ .mfi
ldfe acos_coeff_P7 = [ASIN_Addr1],16
fma.s1 acos_tx2 = acos_tx,acos_tx,f0
nop.i 999
}
{ .mfi
ldfe acos_coeff_P6 = [ASIN_Addr2],16
fma.s1 acos_t2 = acos_t,acos_t,f0
nop.i 999;;
}
{ .mmf
ldfe acos_coeff_P18 = [ASIN_Addr1],16
ldfe acos_coeff_P17 = [ASIN_Addr2],16
fclass.m.unc p8,p0 = f8, 0xc3 //@qnan |@snan
}
;;
{ .mmf
ldfe acos_coeff_P5 = [ASIN_Addr1],16
ldfe acos_coeff_P4 = [ASIN_Addr2],16
frsqrta.s1 acos_y0,p0 = acos_t
}
;;
{ .mfi
ldfe acos_coeff_P16 = [ASIN_Addr1],16
fcmp.gt.s1 p9,p0 = acos_abs_x,f1
nop.i 999
}
{ .mfb
ldfe acos_coeff_P15 = [ASIN_Addr2],16
(p8) fma.d f8 = f8,f1,f0
(p8) br.ret.spnt b0
}
;;
{ .mmf
ldfe acos_coeff_P20 = [ASIN_Addr1],16
ldfe acos_coeff_P19 = [ASIN_Addr2],16
fclass.m.unc p10,p0 = f8, 0x07 //@zero
}
;;
{ .mfi
ldfe acos_coeff_P9 = [ASIN_Addr1],16
fma.s1 acos_t4 = acos_t2,acos_t2,f0
(p9) mov GR_Parameter_Tag = 58
}
{ .mfi
ldfe acos_coeff_P8 = [ASIN_Addr2],16
fma.s1 acos_3by2 = acos_1by2,f1,f1
nop.i 999;;
}
{ .mfi
ldfe acos_coeff_P2 = [ASIN_Addr2],16
fma.s1 acos_tx4 = acos_tx2,acos_tx2,f0
nop.i 999
}
{ .mfb
ldfe acos_coeff_P3 = [ASIN_Addr1],16
fma.s1 acos_t3 = acos_t,acos_t2,f0
(p9) br.cond.spnt __libm_error_region
}
;;
{ .mfi
ldfe acos_coeff_P13 = [ASIN_Addr2],16
fma.s1 acos_H0 = acos_y0,acos_1by2,f0
nop.i 999
}
{ .mfi
ldfe acos_coeff_P14 = [ASIN_Addr1],16
fma.s1 acos_S0 = acos_y0,acos_t,f0
nop.i 999;;
}
{ .mfi
ldfe acos_coeff_P11 = [ASIN_Addr2],16
fcmp.eq.s1 p6,p0 = acos_abs_x, f1
nop.i 999
}
{ .mfi
ldfe acos_coeff_P12 = [ASIN_Addr1],16
fma.s1 acos_tx3 = acos_tx,acos_tx2,f0
nop.i 999
}
;;
{ .mfi
ldfe acos_coeff_P10 = [ASIN_Addr2],16
fma.s1 acos_1poly_p6 = acos_tx,acos_coeff_P7,acos_coeff_P6
nop.i 999
}
{ .mfi
ldfe acos_coeff_P1 = [ASIN_Addr1],16
fma.s1 acos_poly_p6 = acos_t,acos_coeff_P7,acos_coeff_P6
nop.i 999;;
}
{ .mfi
ldfe acos_const_sqrt2by2 = [ASIN_Addr2],16
fma.s1 acos_5by2 = acos_3by2,f1,f1
nop.i 999
}
{ .mfi
ldfe acos_coeff_P21 = [ASIN_Addr1],16
fma.s1 acos_11by4 = acos_3by2,acos_3by2,acos_1by2
nop.i 999;;
}
{ .mfi
ldfe acos_const_piby2 = [ASIN_Addr1],16
fma.s1 acos_poly_p17 = acos_t,acos_coeff_P18,acos_coeff_P17
nop.i 999
}
{ .mfb
nop.m 999
fma.s1 acos_3by4 = acos_3by2,acos_1by2,f0
(p10) br.cond.spnt L(ACOS_ZERO) // Branch to short path if x=0
}
;;
{ .mfi
nop.m 999
fma.s1 acos_poly_p15 = acos_t,acos_coeff_P16,acos_coeff_P15
nop.i 999
}
{ .mfb
nop.m 999
fnma.s1 acos_d = acos_S0,acos_H0,acos_1by2
(p6) br.cond.spnt L(ACOS_ABS_ONE) // Branch to short path if |x|=1
}
;;
{ .mfi
nop.m 999
fma.s1 acos_poly_p19 = acos_t,acos_coeff_P20,acos_coeff_P19
nop.i 999
}
{ .mfi
nop.m 999
fma.s1 acos_poly_p4 = acos_t,acos_coeff_P5,acos_coeff_P4
nop.i 999;;
}
{ .mfi
nop.m 999
fma.s1 acos_1poly_p17 = acos_tx,acos_coeff_P18,acos_coeff_P17
nop.i 999
}
{ .mfi
nop.m 999
fma.s1 acos_poly_p8 = acos_t,acos_coeff_P9,acos_coeff_P8
nop.i 999;;
}
{ .mfi
nop.m 999
fms.s1 acos_35by8 = acos_5by2,acos_11by4,acos_5by2
nop.i 999
}
{ .mfi
nop.m 999
fma.s1 acos_63by8 = acos_5by2,acos_11by4,f1
nop.i 999;;
}
{ .mfi
nop.m 999
fma.s1 acos_poly_p13 = acos_t,acos_coeff_P14,acos_coeff_P13
nop.i 999
}
{ .mfi
nop.m 999
fma.s1 acos_18by4 = acos_3by2,acos_5by2,acos_3by4
nop.i 999;;
}
{ .mfi
nop.m 999
fma.s1 acos_l1 = acos_5by2,acos_d,acos_3by2
nop.i 999
}
{ .mfi
nop.m 999
fma.s1 acos_d2 = acos_d,acos_d,f0
nop.i 999;;
}
{ .mfi
nop.m 999
fma.s1 acos_poly_p15 = acos_t2,acos_poly_p17,acos_poly_p15
nop.i 999
}
{ .mfi
nop.m 999
fma.s1 acos_T0 = acos_d,acos_S0,f0
nop.i 999;;
}
{ .mfi
nop.m 999
fma.s1 acos_poly_p19 = acos_t2,acos_coeff_P21,acos_poly_p19
nop.i 999
}
{ .mfi
nop.m 999
fma.s1 acos_poly_p4 = acos_t2,acos_poly_p6,acos_poly_p4
nop.i 999;;
}
{ .mfi
nop.m 999
fma.s1 acos_d1 = acos_35by8,acos_d,f0
nop.i 999
}
{ .mfi
nop.m 999
fma.s1 acos_231by16 = acos_3by2,acos_35by8,acos_63by8
nop.i 999;;
}
{ .mfi
nop.m 999
fma.s1 acos_poly_p2 = acos_t,acos_coeff_P3,acos_coeff_P2
nop.i 999
}
{ .mfi
nop.m 999
fma.s1 acos_poly_p8 = acos_t2,acos_coeff_P10,acos_poly_p8
nop.i 999;;
}
{ .mfi
nop.m 999
fma.s1 acos_poly_p11 = acos_t,acos_coeff_P12,acos_coeff_P11
nop.i 999
}
{ .mfi
nop.m 999
fma.s1 acos_e0 = acos_d2,acos_l1,acos_d
nop.i 999;;
}
{ .mfi
nop.m 999
fma.s1 acos_1poly_p15 = acos_tx,acos_coeff_P16,acos_coeff_P15
nop.i 999
}
{ .mfi
nop.m 999
fma.s1 acos_poly_p0 = acos_t,acos_coeff_P1,f1
nop.i 999;;
}
{ .mfi
nop.m 999
fma.s1 acos_1poly_p19 = acos_tx,acos_coeff_P20,acos_coeff_P19
nop.i 999
}
{ .mfi
nop.m 999
fma.s1 acos_1poly_p4 = acos_tx,acos_coeff_P5,acos_coeff_P4
nop.i 999;;
}
{ .mfi
nop.m 999
fma.s1 acos_1poly_p8 = acos_tx,acos_coeff_P9,acos_coeff_P8
nop.i 999
}
{ .mfi
nop.m 999
fma.s1 acos_l2 = acos_231by16,acos_d,acos_63by8
nop.i 999;;
}
{ .mfi
nop.m 999
fma.s1 acos_d3 = acos_d2,acos_d,f0
nop.i 999
}
{ .mfi
nop.m 999
fma.s1 acos_T3 = acos_d2,acos_T0,f0
nop.i 999;;
}
{ .mfi
nop.m 999
fma.s1 acos_429by16 = acos_18by4,acos_11by4,acos_231by16
nop.i 999
}
{ .mfi
nop.m 999
fma.s1 acos_S1 = acos_e0,acos_S0,acos_S0
nop.i 999;;
}
{ .mfi
nop.m 999
fma.s1 acos_poly_p4 = acos_t4,acos_poly_p8,acos_poly_p4
nop.i 999
}
{ .mfi
nop.m 999
fma.s1 acos_poly_p15 = acos_t4,acos_poly_p19,acos_poly_p15
nop.i 999;;
}
{ .mfi
nop.m 999
fma.s1 acos_poly_p0 = acos_t2,acos_poly_p2,acos_poly_p0
nop.i 999
}
{ .mfi
nop.m 999
fma.s1 acos_poly_p11 = acos_t2,acos_poly_p13,acos_poly_p11
nop.i 999;;
}
{ .mfi
nop.m 999
fma.s1 acos_t8 = acos_t4,acos_t4,f0
nop.i 999
}
{ .mfi
nop.m 999
fma.s1 acos_e1 = acos_d2,acos_l2,acos_d1
nop.i 999;;
}
{ .mfi
nop.m 999
fma.s1 acos_1poly_p4 = acos_tx2,acos_1poly_p6,acos_1poly_p4
nop.i 999
}
{ .mfi
nop.m 999
fma.s1 acos_1poly_p15 = acos_tx2,acos_1poly_p17,acos_1poly_p15
nop.i 999;;
}
{ .mfi
nop.m 999
fma.s1 acos_1poly_p8 = acos_tx2,acos_coeff_P10,acos_1poly_p8
nop.i 999
}
{ .mfi
nop.m 999
fma.s1 acos_1poly_p19 = acos_tx2,acos_coeff_P21,acos_1poly_p19
nop.i 999;;
}
{ .mfi
nop.m 999
fma.s1 acos_1poly_p2 = acos_tx,acos_coeff_P3,acos_coeff_P2
nop.i 999
}
{ .mfi
nop.m 999
fma.s1 acos_1poly_p13 = acos_tx,acos_coeff_P14,acos_coeff_P13
nop.i 999;;
}
{ .mfi
nop.m 999
fma.s1 acos_1poly_p0 = acos_tx,acos_coeff_P1,f1
nop.i 999
}
{ .mfi
nop.m 999
fma.s1 acos_1poly_p11 = acos_tx,acos_coeff_P12,acos_coeff_P11
nop.i 999;;
}
{ .mfi
nop.m 999
fma.s1 acos_l3 = acos_429by16,acos_d,f0
nop.i 999
}
{ .mfi
nop.m 999
fma.s1 acos_z = acos_e1,acos_T3,acos_S1
nop.i 999;;
}
{ .mfi
nop.m 999
fma.s1 acos_poly_p11 = acos_t4,acos_poly_p15,acos_poly_p11
nop.i 999
}
{ .mfi
nop.m 999
fma.s1 acos_T6 = acos_T3,acos_d3,f0
nop.i 999;;
}
{ .mfi
nop.m 999
fma.s1 acos_t11 = acos_t8,acos_t3,f0
nop.i 999
}
{ .mfi
nop.m 999
fma.s1 acos_poly_p0 = acos_t4,acos_poly_p4,acos_poly_p0
nop.i 999;;
}
{ .mfi
nop.m 999
fma.s1 acos_1poly_p4 = acos_tx4,acos_1poly_p8,acos_1poly_p4
nop.i 999
}
{ .mfi
nop.m 999
fma.s1 acos_1poly_p15 = acos_tx4,acos_1poly_p19,acos_1poly_p15
nop.i 999;;
}
{ .mfi
nop.m 999
fma.s1 acos_1poly_p0 = acos_tx2,acos_1poly_p2,acos_1poly_p0
nop.i 999
}
{ .mfi
nop.m 999
fma.s1 acos_1poly_p11 = acos_tx2,acos_1poly_p13,acos_1poly_p11
nop.i 999;;
}
{ .mfi
nop.m 999
// fcmp.le.s1 acos_pred_LEsqrt2by2,acos_pred_GTsqrt2by2 = acos_abs_x,acos_const_sqrt2by2
fcmp.le.s1 p7,p8 = acos_abs_x,acos_const_sqrt2by2
nop.i 999
}
{ .mfi
nop.m 999
fma.s1 acos_tx8 = acos_tx4,acos_tx4,f0
nop.i 999;;
}
{ .mfi
nop.m 999
fma.s1 acos_z = acos_l3,acos_T6,acos_z
nop.i 999;;
}
{ .mfi
nop.m 999
fma.s1 acos_series_t = acos_t11,acos_poly_p11,acos_poly_p0
nop.i 999
}
{ .mfi
nop.m 999
(p11) fma.s1 acos_const_add = acos_const_piby2, f1, acos_const_piby2
nop.i 999
}
;;
{ .mfi
nop.m 999
(p12) fma.s1 acos_const_add = f1,f0,f0
nop.i 999
}
;;
{ .mfi
nop.m 999
fma.s1 acos_1poly_p0 = acos_tx4,acos_1poly_p4,acos_1poly_p0
nop.i 999
}
{ .mfi
nop.m 999
fma.s1 acos_1poly_p11 = acos_tx4,acos_1poly_p15,acos_1poly_p11
nop.i 999;;
}
{ .mfi
nop.m 999
fma.s1 acos_tx11 = acos_tx8,acos_tx3,f0
nop.i 999;;
}
{ .mfi
nop.m 999
//(acos_pred_GTsqrt2by2) fnma.s1 answer2 = acos_z,acos_series_t,acos_const_piby2
(p8) fnma.s1 answer2 = acos_z,acos_series_t,f0
nop.i 999;;
}
{ .mfi
nop.m 999
fma.s1 acos_series_tx = acos_tx11,acos_1poly_p11,acos_1poly_p0
nop.i 999;;
}
{ .mfi
nop.m 999
//(acos_pred_GTsqrt2by2) fnma.d f8 = acos_sgn_x,answer2,acos_const_piby2
(p8) fnma.d f8 = acos_sgn_x,answer2,acos_const_add
nop.i 999;;
}
{ .mfb
nop.m 999
//(acos_pred_LEsqrt2by2) fnma.d f8 = f8,acos_series_tx,acos_const_piby2
(p7) fnma.d f8 = f8,acos_series_tx,acos_const_piby2
br.ret.sptk b0 ;;
}
L(ACOS_ZERO):
// Here if x=0
{ .mfb
nop.m 999
fma.d f8 = acos_const_piby2,f1,f0
br.ret.sptk b0 ;;
}
L(ACOS_ABS_ONE):
.pred.rel "mutex",p11,p12
// Here if |x|=1
{ .mfi
nop.m 999
(p11) fma.d f8 = acos_const_piby2,f1,acos_const_piby2 // acos(-1)=pi
nop.i 999
}
{ .mfb
nop.m 999
(p12) fma.d f8 = f1,f0,f0 // acos(1)=0
br.ret.sptk b0 ;;
}
.endp acos
ASM_SIZE_DIRECTIVE(acos)
.proc __libm_error_region
__libm_error_region:
.prologue
{ .mfi
add GR_Parameter_Y=-32,sp // Parameter 2 value
nop.f 999
.save ar.pfs,GR_SAVE_PFS
mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
}
{ .mfi
.fframe 64
add sp=-64,sp // Create new stack
nop.f 0
mov GR_SAVE_GP=gp // Save gp
};;
{ .mmi
stfs [GR_Parameter_Y] = f1,16 // Store Parameter 2 on stack
add GR_Parameter_X = 16,sp // Parameter 1 address
.save b0, GR_SAVE_B0
mov GR_SAVE_B0=b0 // Save b0
};;
.body
frcpa.s0 f9,p0 = f0,f0
;;
{ .mib
stfd [GR_Parameter_X] = f8 // Store Parameter 1 on stack
add GR_Parameter_RESULT = 0,GR_Parameter_Y
nop.b 0 // Parameter 3 address
}
{ .mib
stfd [GR_Parameter_Y] = f9,-16 // Store Parameter 3 on stack
adds r32 = 48,sp
br.call.sptk b0=__libm_error_support# // Call error handling function
};;
{ .mmi
ldfd f8 = [r32] // Get return result off stack
.restore sp
add sp = 64,sp // Restore stack pointer
mov b0 = GR_SAVE_B0 // Restore return address
};;
{ .mib
mov gp = GR_SAVE_GP // Restore gp
mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
br.ret.sptk b0 // Return
};;
.endp __libm_error_region
ASM_SIZE_DIRECTIVE(__libm_error_region)
.type __libm_error_support,@function
.global __libm_error_support

693
sysdeps/ia64/fpu/e_acosf.S Normal file
View File

@ -0,0 +1,693 @@
.file "acosf.s"
// Copyright (c) 2000, 2001, Intel Corporation
// All rights reserved.
//
// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
//
// WARRANTY DISCLAIMER
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
// http://developer.intel.com/opensource.
// History
//==============================================================
// 2/02/00 Initial revision
// 6/28/00 Improved speed
// 6/31/00 Changed register allocation because of some duplicate macros
// moved nan exit bundle up to gain a cycle.
// 8/15/00 Bundle added after call to __libm_error_support to properly
// set [the previously overwritten] GR_Parameter_RESULT.
// 8/17/00 Changed predicate register macro-usage to direct predicate
// names due to an assembler bug.
// 10/17/00 Improved speed of x=0 and x=1 paths, set D flag if x denormal.
// Description
//=========================================
// The acosf function computes the principle value of the arc sine of x.
// A doman error occurs for arguments not in the range [-1,+1].
// The acosf function returns the arc cosine in the range [0, +pi] radians.
// acos(1) returns +0
// acos(x) returns a Nan and raises the invalid exception for |x| >1
// |x| <= sqrt(2)/2. get Ax and Bx
// poly_p1 = x p1
// poly_p3 = x2 p4 + p3
// poly_p1 = x2 (poly_p1) + x = x2(x p1) + x
// poly_p2 = x2( poly_p3) + p2 = x2(x2 p4 + p3) + p2
// poly_Ax = x5(x2( poly_p3) + p2) + x2(x p1) + x
// = x5(x2(x2 p4 + p3) + p2) + x2(x p1) + x
// poly_p7 = x2 p8 + p7
// poly_p5 = x2 p6 + p5
// poly_p7 = x4 p9 + (x2 p8 + p7)
// poly_Bx = x4 (x4 p9 + (x2 p8 + p7)) + x2 p6 + p5
// sinf1 = x11(x4 (x4 p9 + (x2 p8 + p7)) + x2 p6 + p5) + x5(x2(x2 p4 + p3) + p2) + x2(x p1) + x
// = x19 p9 + x17 p8 + x15 p7 x13 p6 + x11 p5 + x9 p4 + x7 p3 + x5 p2 + x3 p1 + x
// answer1 = pi/2 - sinf1
// |x| > sqrt(2)/2
// Get z = sqrt(1-x2)
// Get polynomial in t = 1-x2
// t2 = t t
// t4 = t2 t2
// poly_p4 = t p5 + p4
// poly_p1 = t p1 + 1
// poly_p6 = t p7 + p6
// poly_p2 = t p3 + p2
// poly_p8 = t p9 + p8
// poly_p4 = t2 poly_p6 + poly_p4
// = t2 (t p7 + p6) + (t p5 + p4)
// poly_p2 = t2 poly_p2 + poly_p1
// = t2 (t p3 + p2) + (t p1 + 1)
// poly_p4 = t4 poly_p8 + poly_p4
// = t4 (t p9 + p8) + (t2 (t p7 + p6) + (t p5 + p4))
// P(t) = poly_p2 + t4 poly_p8
// = t2 (t p3 + p2) + (t p1 + 1) + t4 (t4 (t p9 + p8) + (t2 (t p7 + p6) + (t p5 + p4)))
// = t3 p3 + t2 p2 + t p1 + 1 + t9 p9 + t8 p8 + t7 p7 + t6 p6 + t5 p5 + t4 p4
// answer2 = sign(x) z P(t) if x>0
// = sign(x) z P(t) + pi if x<0
#include "libm_support.h"
//
// Assembly macros
//=========================================
// predicate registers
//acosf_pred_LEsqrt2by2 = p7
//acosf_pred_GTsqrt2by2 = p8
// integer registers
ACOSF_Addr1 = r33
ACOSF_Addr2 = r34
ACOSF_GR_1by2 = r35
ACOSF_GR_3by2 = r36
ACOSF_GR_5by2 = r37
GR_SAVE_B0 = r38
GR_SAVE_PFS = r39
GR_SAVE_GP = r40
GR_Parameter_X = r41
GR_Parameter_Y = r42
GR_Parameter_RESULT = r43
GR_Parameter_TAG = r44
// floating point registers
acosf_y = f32
acosf_abs_x = f33
acosf_x2 = f34
acosf_sgn_x = f35
acosf_1by2 = f36
acosf_3by2 = f37
acosf_5by2 = f38
acosf_coeff_P3 = f39
acosf_coeff_P8 = f40
acosf_coeff_P1 = f41
acosf_coeff_P4 = f42
acosf_coeff_P5 = f43
acosf_coeff_P2 = f44
acosf_coeff_P7 = f45
acosf_coeff_P6 = f46
acosf_coeff_P9 = f47
acosf_x2 = f48
acosf_x3 = f49
acosf_x4 = f50
acosf_x8 = f51
acosf_x5 = f52
acosf_const_piby2 = f53
acosf_const_sqrt2by2 = f54
acosf_x11 = f55
acosf_poly_p1 = f56
acosf_poly_p3 = f57
acosf_sinf1 = f58
acosf_poly_p2 = f59
acosf_poly_Ax = f60
acosf_poly_p7 = f61
acosf_poly_p5 = f62
acosf_sgnx_t4 = f63
acosf_poly_Bx = f64
acosf_t = f65
acosf_yby2 = f66
acosf_B = f67
acosf_B2 = f68
acosf_Az = f69
acosf_dz = f70
acosf_Sz = f71
acosf_d2z = f72
acosf_Fz = f73
acosf_z = f74
acosf_sgnx_z = f75
acosf_t2 = f76
acosf_2poly_p4 = f77
acosf_2poly_p6 = f78
acosf_2poly_p1 = f79
acosf_2poly_p2 = f80
acosf_2poly_p8 = f81
acosf_t4 = f82
acosf_Pt = f83
acosf_sgnx_2poly_p2 = f84
acosf_sgn_x_piby2 = f85
acosf_poly_p7a = f86
acosf_2poly_p4a = f87
acosf_2poly_p4b = f88
acosf_2poly_p2a = f89
acosf_poly_p1a = f90
// Data tables
//==============================================================
#ifdef _LIBC
.rodata
#else
.data
#endif
.align 16
acosf_coeff_1_table:
ASM_TYPE_DIRECTIVE(acosf_coeff_1_table,@object)
data8 0x3FC5555607DCF816 // P1
data8 0x3F9CF81AD9BAB2C6 // P4
data8 0x3FC59E0975074DF3 // P7
data8 0xBFA6F4CC2780AA1D // P6
data8 0x3FC2DD45292E93CB // P9
data8 0x3fe6a09e667f3bcd // sqrt(2)/2
ASM_SIZE_DIRECTIVE(acosf_coeff_1_table)
acosf_coeff_2_table:
ASM_TYPE_DIRECTIVE(acosf_coeff_2_table,@object)
data8 0x3FA6F108E31EFBA6 // P3
data8 0xBFCA31BF175D82A0 // P8
data8 0x3FA30C0337F6418B // P5
data8 0x3FB332C9266CB1F9 // P2
data8 0x3ff921fb54442d18 // pi_by_2
ASM_SIZE_DIRECTIVE(acosf_coeff_2_table)
.align 32
.global acosf
ASM_TYPE_DIRECTIVE(acosf,@function)
.section .text
.proc acosf
.align 32
acosf:
// Load the addresses of the two tables.
// Then, load the coefficients and other constants.
{ .mfi
alloc r32 = ar.pfs,1,8,4,0
fnma.s1 acosf_t = f8,f8,f1
dep.z ACOSF_GR_1by2 = 0x3f,24,8 // 0x3f000000
}
{ .mfi
addl ACOSF_Addr1 = @ltoff(acosf_coeff_1_table),gp
fma.s1 acosf_x2 = f8,f8,f0
addl ACOSF_Addr2 = @ltoff(acosf_coeff_2_table),gp ;;
}
{ .mfi
ld8 ACOSF_Addr1 = [ACOSF_Addr1]
fmerge.s acosf_abs_x = f1,f8
dep ACOSF_GR_3by2 = -1,r0,22,8 // 0x3fc00000
}
{ .mlx
nop.m 999
movl ACOSF_GR_5by2 = 0x40200000;;
}
{ .mfi
setf.s acosf_1by2 = ACOSF_GR_1by2
fmerge.s acosf_sgn_x = f8,f1
nop.i 999
}
{ .mfi
ld8 ACOSF_Addr2 = [ACOSF_Addr2]
nop.f 0
nop.i 999;;
}
{ .mfi
setf.s acosf_5by2 = ACOSF_GR_5by2
fcmp.lt.s1 p11,p12 = f8,f0
nop.i 999;;
}
{ .mmf
ldfpd acosf_coeff_P1,acosf_coeff_P4 = [ACOSF_Addr1],16
setf.s acosf_3by2 = ACOSF_GR_3by2
fclass.m.unc p8,p0 = f8, 0xc3 ;; //@qnan | @snan
}
{ .mfi
ldfpd acosf_coeff_P7,acosf_coeff_P6 = [ACOSF_Addr1],16
fma.s1 acosf_t2 = acosf_t,acosf_t,f0
nop.i 999
}
{ .mfi
ldfpd acosf_coeff_P3,acosf_coeff_P8 = [ACOSF_Addr2],16
fma.s1 acosf_x4 = acosf_x2,acosf_x2,f0
nop.i 999;;
}
{ .mfi
ldfpd acosf_coeff_P9,acosf_const_sqrt2by2 = [ACOSF_Addr1]
fclass.m.unc p10,p0 = f8, 0x07 //@zero
nop.i 999
}
{ .mfi
ldfpd acosf_coeff_P5,acosf_coeff_P2 = [ACOSF_Addr2],16
fma.s1 acosf_x3 = f8,acosf_x2,f0
nop.i 999;;
}
{ .mfi
ldfd acosf_const_piby2 = [ACOSF_Addr2]
frsqrta.s1 acosf_B,p0 = acosf_t
nop.i 999
}
{ .mfb
nop.m 999
(p8) fma.s f8 = f8,f1,f0
(p8) br.ret.spnt b0 ;; // Exit if x=nan
}
{ .mfb
nop.m 999
fcmp.eq.s1 p6,p0 = acosf_abs_x,f1
(p10) br.cond.spnt L(ACOSF_ZERO) ;; // Branch if x=0
}
{ .mfi
nop.m 999
fcmp.gt.s1 p9,p0 = acosf_abs_x,f1
nop.i 999;;
}
{ .mfi
nop.m 999
fma.s1 acosf_x8 = acosf_x4,acosf_x4,f0
nop.i 999
}
{ .mfb
nop.m 999
fma.s1 acosf_t4 = acosf_t2,acosf_t2,f0
(p6) br.cond.spnt L(ACOSF_ABS_ONE) ;; // Branch if |x|=1
}
{ .mfi
nop.m 999
fma.s1 acosf_x5 = acosf_x2,acosf_x3,f0
nop.i 999
}
{ .mfb
(p9) mov GR_Parameter_TAG = 59
fma.s1 acosf_yby2 = acosf_t,acosf_1by2,f0
(p9) br.cond.spnt __libm_error_region ;; // Branch if |x|>1
}
{ .mfi
nop.m 999
fma.s1 acosf_Az = acosf_t,acosf_B,f0
nop.i 999
}
{ .mfi
nop.m 999
fma.s1 acosf_B2 = acosf_B,acosf_B,f0
nop.i 999;;
}
{ .mfi
nop.m 999
fma.s1 acosf_poly_p1 = f8,acosf_coeff_P1,f0
nop.i 999
}
{ .mfi
nop.m 999
fma.s1 acosf_2poly_p1 = acosf_coeff_P1,acosf_t,f1
nop.i 999;;
}
{ .mfi
nop.m 999
fma.s1 acosf_poly_p3 = acosf_coeff_P4,acosf_x2,acosf_coeff_P3
nop.i 999
}
{ .mfi
nop.m 999
fma.s1 acosf_2poly_p6 = acosf_coeff_P7,acosf_t,acosf_coeff_P6
nop.i 999;;
}
{ .mfi
nop.m 999
fma.s1 acosf_poly_p7 = acosf_x2,acosf_coeff_P8,acosf_coeff_P7
nop.i 999
}
{ .mfi
nop.m 999
fma.s1 acosf_2poly_p2 = acosf_coeff_P3,acosf_t,acosf_coeff_P2
nop.i 999;;
}
{ .mfi
nop.m 999
fma.s1 acosf_poly_p5 = acosf_x2,acosf_coeff_P6,acosf_coeff_P5
nop.i 999
}
{ .mfi
nop.m 999
fma.s1 acosf_2poly_p4 = acosf_coeff_P5,acosf_t,acosf_coeff_P4
nop.i 999;;
}
{ .mfi
nop.m 999
fma.s1 acosf_x11 = acosf_x8,acosf_x3,f0
nop.i 999
}
{ .mfi
nop.m 999
fnma.s1 acosf_dz = acosf_B2,acosf_yby2,acosf_1by2
nop.i 999;;
}
{ .mfi
nop.m 999
fma.s1 acosf_poly_p1a = acosf_x2,acosf_poly_p1,f8
nop.i 999
}
{ .mfi
nop.m 999
fma.s1 acosf_2poly_p8 = acosf_coeff_P9,acosf_t,acosf_coeff_P8
nop.i 999;;
}
// Get the absolute value of x and determine the region in which x lies
{ .mfi
nop.m 999
fcmp.le.s1 p7,p8 = acosf_abs_x,acosf_const_sqrt2by2
nop.i 999
}
{ .mfi
nop.m 999
fma.s1 acosf_poly_p2 = acosf_x2,acosf_poly_p3,acosf_coeff_P2
nop.i 999;;
}
{ .mfi
nop.m 999
fma.s1 acosf_poly_p7a = acosf_x4,acosf_coeff_P9,acosf_poly_p7
nop.i 999
}
{ .mfi
nop.m 999
fma.s1 acosf_2poly_p2a = acosf_2poly_p2,acosf_t2,acosf_2poly_p1
nop.i 999;;
}
{ .mfi
nop.m 999
(p8) fma.s1 acosf_sgnx_t4 = acosf_sgn_x,acosf_t4,f0
nop.i 999
}
{ .mfi
nop.m 999
(p8) fma.s1 acosf_2poly_p4a = acosf_2poly_p6,acosf_t2,acosf_2poly_p4
nop.i 999;;
}
{ .mfi
nop.m 999
(p8) fma.s1 acosf_Sz = acosf_5by2,acosf_dz,acosf_3by2
nop.i 999
}
{ .mfi
nop.m 999
(p8) fma.s1 acosf_d2z = acosf_dz,acosf_dz,f0
nop.i 999;;
}
{ .mfi
nop.m 999
(p8) fnma.d.s1 acosf_sgn_x_piby2 = acosf_sgn_x,acosf_const_piby2,acosf_const_piby2
nop.i 999
}
{ .mfi
nop.m 999
(p7) fma.s1 acosf_poly_Ax = acosf_x5,acosf_poly_p2,acosf_poly_p1a
nop.i 999;;
}
{ .mfi
nop.m 999
(p7) fma.s1 acosf_poly_Bx = acosf_x4,acosf_poly_p7a,acosf_poly_p5
nop.i 999
}
{ .mfi
nop.m 999
(p8) fma.s1 acosf_sgnx_2poly_p2 = acosf_sgn_x,acosf_2poly_p2a,f0
nop.i 999;;
}
{ .mfi
nop.m 999
fcmp.eq.s0 p6,p0 = f8,f0 // Only purpose is to set D if x denormal
nop.i 999
}
{ .mfi
nop.m 999
(p8) fma.s1 acosf_2poly_p4b = acosf_2poly_p8,acosf_t4,acosf_2poly_p4a
nop.i 999;;
}
{ .mfi
nop.m 999
(p8) fma.s1 acosf_Fz = acosf_d2z,acosf_Sz,acosf_dz
nop.i 999;;
}
{ .mfi
nop.m 999
(p8) fma.d.s1 acosf_Pt = acosf_2poly_p4b,acosf_sgnx_t4,acosf_sgnx_2poly_p2
nop.i 999;;
}
{ .mfi
nop.m 999
(p8) fma.d.s1 acosf_z = acosf_Az,acosf_Fz,acosf_Az
nop.i 999 ;;
}
{ .mfi
nop.m 999
(p7) fma.d.s1 acosf_sinf1 = acosf_x11,acosf_poly_Bx,acosf_poly_Ax
nop.i 999;;
}
.pred.rel "mutex",p8,p7 //acosf_pred_GTsqrt2by2,acosf_pred_LEsqrt2by2
{ .mfi
nop.m 999
(p8) fma.s f8 = acosf_z,acosf_Pt,acosf_sgn_x_piby2
nop.i 999
}
{ .mfb
nop.m 999
(p7) fms.s f8 = acosf_const_piby2,f1,acosf_sinf1
br.ret.sptk b0 ;;
}
L(ACOSF_ZERO):
// Here if x=0
{ .mfb
nop.m 999
fma.s f8 = acosf_const_piby2,f1,f0 // acosf(0)=pi/2
br.ret.sptk b0 ;;
}
L(ACOSF_ABS_ONE):
.pred.rel "mutex",p11,p12
// Here if |x|=1
{ .mfi
nop.m 999
(p11) fma.s f8 = acosf_const_piby2,f1,acosf_const_piby2 // acosf(-1)=pi
nop.i 999
}
{ .mfb
nop.m 999
(p12) fma.s f8 = f1,f0,f0 // acosf(1)=0
br.ret.sptk b0 ;;
}
.endp acosf
ASM_SIZE_DIRECTIVE(acosf)
// Stack operations when calling error support.
// (1) (2)
// sp -> + psp -> +
// | |
// | | <- GR_Y
// | |
// | <-GR_Y Y2->|
// | |
// | | <- GR_X
// | |
// sp-64 -> + sp -> +
// save ar.pfs save b0
// save gp
// Stack operations when calling error support.
// (3) (call) (4)
// psp -> + sp -> +
// | |
// R3 ->| <- GR_RESULT | -> f8
// | |
// Y2 ->| <- GR_Y |
// | |
// X1 ->| |
// | |
// sp -> + +
// restore gp
// restore ar.pfs
.proc __libm_error_region
__libm_error_region:
.prologue
{ .mfi
add GR_Parameter_Y=-32,sp // Parameter 2 value
nop.f 999
.save ar.pfs,GR_SAVE_PFS
mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
}
{ .mfi
.fframe 64
add sp=-64,sp // Create new stack
nop.f 0
mov GR_SAVE_GP=gp // Save gp
};;
{ .mmi
stfs [GR_Parameter_Y] = f1,16 // Store Parameter 2 on stack
add GR_Parameter_X = 16,sp // Parameter 1 address
.save b0, GR_SAVE_B0
mov GR_SAVE_B0=b0 // Save b0
};;
.body
{ .mfi
nop.m 0
frcpa.s0 f9,p0 = f0,f0
nop.i 0
};;
{ .mib
stfs [GR_Parameter_X] = f8 // Store Parameter 1 on stack
add GR_Parameter_RESULT = 0,GR_Parameter_Y
nop.b 0 // Parameter 3 address
}
{ .mib
stfs [GR_Parameter_Y] = f9 // Store Parameter 3 on stack
add GR_Parameter_Y = -16,GR_Parameter_Y
br.call.sptk b0=__libm_error_support# // Call error handling function
};;
{ .mmi
nop.m 0
nop.m 0
add GR_Parameter_RESULT = 48,sp
};;
{ .mmi
ldfs f8 = [GR_Parameter_RESULT] // Get return result off stack
.restore sp
add sp = 64,sp // Restore stack pointer
mov b0 = GR_SAVE_B0 // Restore return address
};;
{ .mib
mov gp = GR_SAVE_GP // Restore gp
mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
br.ret.sptk b0 // Return
};;
.endp __libm_error_region
ASM_SIZE_DIRECTIVE(__libm_error_region)
.type __libm_error_support#,@function
.global __libm_error_support#

1094
sysdeps/ia64/fpu/e_acosl.S Normal file

File diff suppressed because it is too large Load Diff

884
sysdeps/ia64/fpu/e_asin.S Normal file
View File

@ -0,0 +1,884 @@
.file "asin.s"
// Copyright (c) 2000, 2001, Intel Corporation
// All rights reserved.
//
// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
//
// WARRANTY DISCLAIMER
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
// http://developer.intel.com/opensource.
// History
//==============================================================
// 2/02/00 Initial version
// 8/17/00 New and much faster algorithm.
// 8/31/00 Avoided bank conflicts on loads, shortened |x|=1 path,
// fixed mfb split issue stalls.
// 12/19/00 Fixed small arg cases to force inexact, or inexact and underflow.
// Description
//=========================================
// The asin function computes the principle value of the arc sine of x.
// asin(0) returns 0, asin(1) returns pi/2, asin(-1) returns -pi/2.
// A doman error occurs for arguments not in the range [-1,+1].
// The asin function returns the arc sine in the range [-pi/2, +pi/2] radians.
#include "libm_support.h"
//
// Assembly macros
//=========================================
// predicate registers
//asin_pred_LEsqrt2by2 = p7
//asin_pred_GTsqrt2by2 = p8
// integer registers
ASIN_Addr1 = r33
ASIN_Addr2 = r34
ASIN_FFFE = r35
ASIN_lnorm_sig = r36
ASIN_snorm_exp = r37
GR_SAVE_B0 = r36
GR_SAVE_PFS = r37
GR_SAVE_GP = r38
GR_Parameter_X = r39
GR_Parameter_Y = r40
GR_Parameter_RESULT = r41
GR_Parameter_Tag = r42
// floating point registers
asin_coeff_P1 = f32
asin_coeff_P2 = f33
asin_coeff_P3 = f34
asin_coeff_P4 = f35
asin_coeff_P5 = f36
asin_coeff_P6 = f37
asin_coeff_P7 = f38
asin_coeff_P8 = f39
asin_coeff_P9 = f40
asin_coeff_P10 = f41
asin_coeff_P11 = f42
asin_coeff_P12 = f43
asin_coeff_P13 = f44
asin_coeff_P14 = f45
asin_coeff_P15 = f46
asin_coeff_P16 = f47
asin_coeff_P17 = f48
asin_coeff_P18 = f49
asin_coeff_P19 = f50
asin_coeff_P20 = f51
asin_coeff_P21 = f52
asin_const_sqrt2by2 = f53
asin_const_piby2 = f54
asin_abs_x = f55
asin_tx = f56
asin_tx2 = f57
asin_tx3 = f58
asin_tx4 = f59
asin_tx8 = f60
asin_tx11 = f61
asin_1poly_p8 = f62
asin_1poly_p19 = f63
asin_1poly_p4 = f64
asin_1poly_p15 = f65
asin_1poly_p6 = f66
asin_1poly_p17 = f67
asin_1poly_p0 = f68
asin_1poly_p11 = f69
asin_1poly_p2 = f70
asin_1poly_p13 = f71
asin_series_tx = f72
asin_t = f73
asin_t2 = f74
asin_t3 = f75
asin_t4 = f76
asin_t8 = f77
asin_t11 = f78
asin_poly_p8 = f79
asin_poly_p19 = f80
asin_poly_p4 = f81
asin_poly_p15 = f82
asin_poly_p6 = f83
asin_poly_p17 = f84
asin_poly_p0 = f85
asin_poly_p11 = f86
asin_poly_p2 = f87
asin_poly_p13 = f88
asin_series_t = f89
asin_1by2 = f90
asin_3by2 = f91
asin_5by2 = f92
asin_11by4 = f93
asin_35by8 = f94
asin_63by8 = f95
asin_231by16 = f96
asin_y0 = f97
asin_H0 = f98
asin_S0 = f99
asin_d = f100
asin_l1 = f101
asin_d2 = f102
asin_T0 = f103
asin_d1 = f104
asin_e0 = f105
asin_l2 = f106
asin_d3 = f107
asin_T3 = f108
asin_S1 = f109
asin_e1 = f110
asin_z = f111
answer2 = f112
asin_sgn_x = f113
asin_429by16 = f114
asin_18by4 = f115
asin_3by4 = f116
asin_l3 = f117
asin_T6 = f118
asin_eps_exp = f119
asin_eps_sig = f120
asin_eps = f120
// Data tables
//==============================================================
#ifdef _LIBC
.rodata
#else
.data
#endif
.align 16
asin_coeff_1_table:
ASM_TYPE_DIRECTIVE(asin_coeff_1_table,@object)
data8 0xE4E7E0A423A21249 , 0x00003FF8 //P7
data8 0xC2F7EE0200FCE2A5 , 0x0000C003 //P18
data8 0xB745D7F6C65C20E0 , 0x00003FF9 //P5
data8 0xF75E381A323D4D94 , 0x0000C002 //P16
data8 0x8959C2629C1024C0 , 0x0000C002 //P20
data8 0xAFF68E7D241292C5 , 0x00003FF8 //P9
data8 0xB6DB6DB7260AC30D , 0x00003FFA //P3
data8 0xD0417CE2B41CB7BF , 0x0000C000 //P14
data8 0x81D570FEA724E3E4 , 0x0000BFFD //P12
data8 0xAAAAAAAAAAAAC277 , 0x00003FFC //P1
data8 0xF534912FF3E7B76F , 0x00003FFF //P21
data8 0xc90fdaa22168c235 , 0x00003fff // pi/2
data8 0x0000000000000000 , 0x00000000 // pad to avoid data bank conflict
ASM_SIZE_DIRECTIVE(asin_coeff_1_table)
asin_coeff_2_table:
ASM_TYPE_DIRECTIVE(asin_coeff_2_table,@object)
data8 0x8E26AF5F29B39A2A , 0x00003FF9 //P6
data8 0xB4F118A4B1015470 , 0x00004003 //P17
data8 0xF8E38E10C25990E0 , 0x00003FF9 //P4
data8 0x80F50489AEF1CAC6 , 0x00004002 //P15
data8 0x92728015172CFE1C , 0x00004003 //P19
data8 0xBBC3D831D4595971 , 0x00003FF8 //P8
data8 0x999999999952A5C3 , 0x00003FFB //P2
data8 0x855576BE6F0975EC , 0x00003FFF //P13
data8 0xF12420E778077D89 , 0x00003FFA //P11
data8 0xB6590FF4D23DE003 , 0x00003FF3 //P10
data8 0xb504f333f9de6484 , 0x00003ffe // sqrt(2)/2
ASM_SIZE_DIRECTIVE(asin_coeff_2_table)
.align 32
.global asin
.section .text
.proc asin
.align 32
asin:
{ .mfi
alloc r32 = ar.pfs,1,6,4,0
fma.s1 asin_tx = f8,f8,f0
addl ASIN_Addr2 = @ltoff(asin_coeff_2_table),gp
}
{ .mfi
mov ASIN_FFFE = 0xFFFE
fnma.s1 asin_t = f8,f8,f1
addl ASIN_Addr1 = @ltoff(asin_coeff_1_table),gp
}
;;
{ .mfi
setf.exp asin_1by2 = ASIN_FFFE
fmerge.s asin_abs_x = f1,f8
nop.i 999 ;;
}
{ .mmf
ld8 ASIN_Addr1 = [ASIN_Addr1]
ld8 ASIN_Addr2 = [ASIN_Addr2]
fmerge.s asin_sgn_x = f8,f1 ;;
}
{ .mfi
ldfe asin_coeff_P7 = [ASIN_Addr1],16
fma.s1 asin_tx2 = asin_tx,asin_tx,f0
nop.i 999
}
{ .mfi
ldfe asin_coeff_P6 = [ASIN_Addr2],16
fma.s1 asin_t2 = asin_t,asin_t,f0
nop.i 999;;
}
{ .mmf
ldfe asin_coeff_P18 = [ASIN_Addr1],16
ldfe asin_coeff_P17 = [ASIN_Addr2],16
fclass.m.unc p8,p0 = f8, 0xc3 //@qnan |@snan
}
;;
{ .mmf
ldfe asin_coeff_P5 = [ASIN_Addr1],16
ldfe asin_coeff_P4 = [ASIN_Addr2],16
frsqrta.s1 asin_y0,p0 = asin_t
}
;;
{ .mfi
ldfe asin_coeff_P16 = [ASIN_Addr1],16
fcmp.gt.s1 p9,p0 = asin_abs_x,f1
nop.i 999
}
{ .mfb
ldfe asin_coeff_P15 = [ASIN_Addr2],16
(p8) fma.d f8 = f8,f1,f0
(p8) br.ret.spnt b0
}
;;
{ .mmf
ldfe asin_coeff_P20 = [ASIN_Addr1],16
ldfe asin_coeff_P19 = [ASIN_Addr2],16
fclass.m.unc p8,p0 = f8, 0x07 //@zero
}
;;
{ .mfi
ldfe asin_coeff_P9 = [ASIN_Addr1],16
fma.s1 asin_t4 = asin_t2,asin_t2,f0
(p9) mov GR_Parameter_Tag = 61
}
{ .mfi
ldfe asin_coeff_P8 = [ASIN_Addr2],16
fma.s1 asin_3by2 = asin_1by2,f1,f1
nop.i 999;;
}
{ .mfi
ldfe asin_coeff_P2 = [ASIN_Addr2],16
fma.s1 asin_tx4 = asin_tx2,asin_tx2,f0
nop.i 999
}
{ .mfb
ldfe asin_coeff_P3 = [ASIN_Addr1],16
fma.s1 asin_t3 = asin_t,asin_t2,f0
(p8) br.ret.spnt b0
}
;;
{ .mfi
ldfe asin_coeff_P13 = [ASIN_Addr2],16
fma.s1 asin_H0 = asin_y0,asin_1by2,f0
nop.i 999
}
{ .mfb
ldfe asin_coeff_P14 = [ASIN_Addr1],16
fma.s1 asin_S0 = asin_y0,asin_t,f0
(p9) br.cond.spnt __libm_error_region
}
;;
{ .mfi
ldfe asin_coeff_P11 = [ASIN_Addr2],16
fcmp.eq.s1 p6,p0 = asin_abs_x,f1
nop.i 999
}
{ .mfi
ldfe asin_coeff_P12 = [ASIN_Addr1],16
fma.s1 asin_tx3 = asin_tx,asin_tx2,f0
nop.i 999;;
}
{ .mfi
ldfe asin_coeff_P10 = [ASIN_Addr2],16
fma.s1 asin_1poly_p6 = asin_tx,asin_coeff_P7,asin_coeff_P6
nop.i 999
}
{ .mfi
ldfe asin_coeff_P1 = [ASIN_Addr1],16
fma.s1 asin_poly_p6 = asin_t,asin_coeff_P7,asin_coeff_P6
nop.i 999;;
}
{ .mfi
ldfe asin_const_sqrt2by2 = [ASIN_Addr2],16
fma.s1 asin_5by2 = asin_3by2,f1,f1
nop.i 999
}
{ .mfi
ldfe asin_coeff_P21 = [ASIN_Addr1],16
fma.s1 asin_11by4 = asin_3by2,asin_3by2,asin_1by2
nop.i 999;;
}
{ .mfi
ldfe asin_const_piby2 = [ASIN_Addr1],16
fma.s1 asin_poly_p17 = asin_t,asin_coeff_P18,asin_coeff_P17
nop.i 999
}
{ .mfb
nop.m 999
fma.s1 asin_3by4 = asin_3by2,asin_1by2,f0
(p6) br.cond.spnt L(ASIN_ABS_1) // Branch to short exit if |x|=1
}
;;
{ .mfi
addl ASIN_lnorm_sig = -0x1,r0 // Form significand 0xffffffffffffffff
fma.s1 asin_poly_p15 = asin_t,asin_coeff_P16,asin_coeff_P15
nop.i 999
}
{ .mfi
addl ASIN_snorm_exp = 0x0c001,r0 // Form small exponent
fnma.s1 asin_d = asin_S0,asin_H0,asin_1by2
nop.i 999;;
}
// Form the exponent and significand of a small number
{ .mfi
setf.sig asin_eps_sig = ASIN_lnorm_sig
fma.s1 asin_poly_p19 = asin_t,asin_coeff_P20,asin_coeff_P19
nop.i 999
}
{ .mfi
setf.exp asin_eps_exp = ASIN_snorm_exp
fma.s1 asin_poly_p4 = asin_t,asin_coeff_P5,asin_coeff_P4
nop.i 999;;
}
{ .mfi
nop.m 999
fma.s1 asin_1poly_p17 = asin_tx,asin_coeff_P18,asin_coeff_P17
nop.i 999
}
{ .mfi
nop.m 999
fma.s1 asin_poly_p8 = asin_t,asin_coeff_P9,asin_coeff_P8
nop.i 999;;
}
{ .mfi
nop.m 999
fms.s1 asin_35by8 = asin_5by2,asin_11by4,asin_5by2
nop.i 999
}
{ .mfi
nop.m 999
fma.s1 asin_63by8 = asin_5by2,asin_11by4,f1
nop.i 999;;
}
{ .mfi
nop.m 999
fma.s1 asin_poly_p13 = asin_t,asin_coeff_P14,asin_coeff_P13
nop.i 999
}
{ .mfi
nop.m 999
fma.s1 asin_18by4 = asin_3by2,asin_5by2,asin_3by4
nop.i 999;;
}
{ .mfi
nop.m 999
fma.s1 asin_l1 = asin_5by2,asin_d,asin_3by2
nop.i 999
}
{ .mfi
nop.m 999
fma.s1 asin_d2 = asin_d,asin_d,f0
nop.i 999;;
}
{ .mfi
nop.m 999
fma.s1 asin_poly_p15 = asin_t2,asin_poly_p17,asin_poly_p15
nop.i 999
}
{ .mfi
nop.m 999
fma.s1 asin_T0 = asin_d,asin_S0,f0
nop.i 999;;
}
{ .mfi
nop.m 999
fma.s1 asin_poly_p19 = asin_t2,asin_coeff_P21,asin_poly_p19
nop.i 999
}
{ .mfi
nop.m 999
fma.s1 asin_poly_p4 = asin_t2,asin_poly_p6,asin_poly_p4
nop.i 999;;
}
{ .mfi
nop.m 999
fma.s1 asin_d1 = asin_35by8,asin_d,f0
nop.i 999
}
{ .mfi
nop.m 999
fma.s1 asin_231by16 = asin_3by2,asin_35by8,asin_63by8
nop.i 999;;
}
{ .mfi
nop.m 999
fma.s1 asin_poly_p2 = asin_t,asin_coeff_P3,asin_coeff_P2
nop.i 999
}
{ .mfi
nop.m 999
fma.s1 asin_poly_p8 = asin_t2,asin_coeff_P10,asin_poly_p8
nop.i 999;;
}
{ .mfi
nop.m 999
fma.s1 asin_poly_p11 = asin_t,asin_coeff_P12,asin_coeff_P11
nop.i 999
}
{ .mfi
nop.m 999
fma.s1 asin_e0 = asin_d2,asin_l1,asin_d
nop.i 999;;
}
{ .mfi
nop.m 999
fma.s1 asin_1poly_p15 = asin_tx,asin_coeff_P16,asin_coeff_P15
nop.i 999
}
{ .mfi
nop.m 999
fma.s1 asin_poly_p0 = asin_t,asin_coeff_P1,f1
nop.i 999;;
}
{ .mfi
nop.m 999
fma.s1 asin_1poly_p19 = asin_tx,asin_coeff_P20,asin_coeff_P19
nop.i 999
}
{ .mfi
nop.m 999
fma.s1 asin_1poly_p4 = asin_tx,asin_coeff_P5,asin_coeff_P4
nop.i 999;;
}
{ .mfi
nop.m 999
fma.s1 asin_1poly_p8 = asin_tx,asin_coeff_P9,asin_coeff_P8
nop.i 999
}
{ .mfi
nop.m 999
fma.s1 asin_l2 = asin_231by16,asin_d,asin_63by8
nop.i 999;;
}
{ .mfi
nop.m 999
fma.s1 asin_d3 = asin_d2,asin_d,f0
nop.i 999
}
{ .mfi
nop.m 999
fma.s1 asin_T3 = asin_d2,asin_T0,f0
nop.i 999;;
}
{ .mfi
nop.m 999
fma.s1 asin_429by16 = asin_18by4,asin_11by4,asin_231by16
nop.i 999
}
{ .mfi
nop.m 999
fma.s1 asin_S1 = asin_e0,asin_S0,asin_S0
nop.i 999;;
}
{ .mfi
nop.m 999
fma.s1 asin_poly_p4 = asin_t4,asin_poly_p8,asin_poly_p4
nop.i 999
}
{ .mfi
nop.m 999
fma.s1 asin_poly_p15 = asin_t4,asin_poly_p19,asin_poly_p15
nop.i 999;;
}
{ .mfi
nop.m 999
fma.s1 asin_poly_p0 = asin_t2,asin_poly_p2,asin_poly_p0
nop.i 999
}
{ .mfi
nop.m 999
fma.s1 asin_poly_p11 = asin_t2,asin_poly_p13,asin_poly_p11
nop.i 999;;
}
{ .mfi
nop.m 999
fma.s1 asin_t8 = asin_t4,asin_t4,f0
nop.i 999
}
{ .mfi
nop.m 999
fma.s1 asin_e1 = asin_d2,asin_l2,asin_d1
nop.i 999;;
}
{ .mfi
nop.m 999
fma.s1 asin_1poly_p4 = asin_tx2,asin_1poly_p6,asin_1poly_p4
nop.i 999
}
{ .mfi
nop.m 999
fma.s1 asin_1poly_p15 = asin_tx2,asin_1poly_p17,asin_1poly_p15
nop.i 999;;
}
{ .mfi
nop.m 999
fma.s1 asin_1poly_p8 = asin_tx2,asin_coeff_P10,asin_1poly_p8
nop.i 999
}
{ .mfi
nop.m 999
fma.s1 asin_1poly_p19 = asin_tx2,asin_coeff_P21,asin_1poly_p19
nop.i 999;;
}
{ .mfi
nop.m 999
fma.s1 asin_1poly_p2 = asin_tx,asin_coeff_P3,asin_coeff_P2
nop.i 999
}
{ .mfi
nop.m 999
fma.s1 asin_1poly_p13 = asin_tx,asin_coeff_P14,asin_coeff_P13
nop.i 999;;
}
{ .mfi
nop.m 999
fma.s1 asin_1poly_p0 = asin_tx,asin_coeff_P1,f1
nop.i 999
}
{ .mfi
nop.m 999
fma.s1 asin_1poly_p11 = asin_tx,asin_coeff_P12,asin_coeff_P11
nop.i 999;;
}
{ .mfi
nop.m 999
fma.s1 asin_l3 = asin_429by16,asin_d,f0
nop.i 999
}
{ .mfi
nop.m 999
fma.s1 asin_z = asin_e1,asin_T3,asin_S1
nop.i 999;;
}
{ .mfi
nop.m 999
fma.s1 asin_poly_p11 = asin_t4,asin_poly_p15,asin_poly_p11
nop.i 999
}
{ .mfi
nop.m 999
fma.s1 asin_T6 = asin_T3,asin_d3,f0
nop.i 999;;
}
{ .mfi
nop.m 999
fma.s1 asin_t11 = asin_t8,asin_t3,f0
nop.i 999
}
{ .mfi
nop.m 999
fma.s1 asin_poly_p0 = asin_t4,asin_poly_p4,asin_poly_p0
nop.i 999;;
}
{ .mfi
nop.m 999
fma.s1 asin_1poly_p4 = asin_tx4,asin_1poly_p8,asin_1poly_p4
nop.i 999
}
{ .mfi
nop.m 999
fma.s1 asin_1poly_p15 = asin_tx4,asin_1poly_p19,asin_1poly_p15
nop.i 999;;
}
{ .mfi
nop.m 999
fma.s1 asin_1poly_p0 = asin_tx2,asin_1poly_p2,asin_1poly_p0
nop.i 999
}
{ .mfi
nop.m 999
fma.s1 asin_1poly_p11 = asin_tx2,asin_1poly_p13,asin_1poly_p11
nop.i 999;;
}
{ .mfi
nop.m 999
// fcmp.le.s1 asin_pred_LEsqrt2by2,asin_pred_GTsqrt2by2 = asin_abs_x,asin_const_sqrt2by2
fcmp.le.s1 p7,p8 = asin_abs_x,asin_const_sqrt2by2
nop.i 999
}
{ .mfi
nop.m 999
fma.s1 asin_tx8 = asin_tx4,asin_tx4,f0
nop.i 999;;
}
// Form a small number to force inexact flag for small args
{ .mfi
nop.m 999
fmerge.se asin_eps = asin_eps_exp,asin_eps_sig
nop.i 999
}
{ .mfi
nop.m 999
fma.s1 asin_z = asin_l3,asin_T6,asin_z
nop.i 999;;
}
{ .mfi
nop.m 999
fma.s1 asin_series_t = asin_t11,asin_poly_p11,asin_poly_p0
nop.i 999;;
}
{ .mfi
nop.m 999
fma.s1 asin_1poly_p0 = asin_tx4,asin_1poly_p4,asin_1poly_p0
nop.i 999
}
{ .mfi
nop.m 999
fma.s1 asin_1poly_p11 = asin_tx4,asin_1poly_p15,asin_1poly_p11
nop.i 999;;
}
{ .mfi
nop.m 999
fma.s1 asin_tx11 = asin_tx8,asin_tx3,f0
nop.i 999;;
}
{ .mfi
nop.m 999
//(asin_pred_GTsqrt2by2) fnma.s1 answer2 = asin_z,asin_series_t,asin_const_piby2
(p8) fnma.s1 answer2 = asin_z,asin_series_t,asin_const_piby2
nop.i 999;;
}
{ .mfi
nop.m 999
fma.s1 asin_series_tx = asin_tx11,asin_1poly_p11,asin_1poly_p0
nop.i 999;;
}
{ .mfi
nop.m 999
//(asin_pred_GTsqrt2by2) fma.d f8 = asin_sgn_x,answer2,f0
(p8) fma.d f8 = asin_sgn_x,answer2,f0
nop.i 999;;
}
// asin_eps is added only to force inexact and possibly underflow flag
// in case asin_series_tx is zero
//
{ .mfi
nop.m 999
(p7) fma.d asin_eps = f8,asin_series_tx,asin_eps
nop.i 999
}
{ .mfb
nop.m 999
//(asin_pred_LEsqrt2by2) fma.d f8 = f8,asin_series_tx,f0
(p7) fma.d f8 = f8,asin_series_tx,f0
br.ret.sptk b0
}
;;
L(ASIN_ABS_1):
// Here for short exit if |x|=1
{ .mfb
nop.m 999
fma.d f8 = asin_sgn_x,asin_const_piby2,f0
br.ret.sptk b0
}
;;
.endp asin
ASM_SIZE_DIRECTIVE(asin)
.proc __libm_error_region
__libm_error_region:
.prologue
{ .mfi
add GR_Parameter_Y=-32,sp // Parameter 2 value
nop.f 999
.save ar.pfs,GR_SAVE_PFS
mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
}
{ .mfi
.fframe 64
add sp=-64,sp // Create new stack
nop.f 0
mov GR_SAVE_GP=gp // Save gp
};;
{ .mmi
stfs [GR_Parameter_Y] = f1,16 // Store Parameter 2 on stack
add GR_Parameter_X = 16,sp // Parameter 1 address
.save b0, GR_SAVE_B0
mov GR_SAVE_B0=b0 // Save b0
};;
.body
frcpa.s0 f9,p0 = f0,f0
;;
{ .mib
stfd [GR_Parameter_X] = f8 // Store Parameter 1 on stack
add GR_Parameter_RESULT = 0,GR_Parameter_Y
nop.b 0 // Parameter 3 address
}
{ .mib
stfd [GR_Parameter_Y] = f9,-16 // Store Parameter 3 on stack
adds r32 = 48,sp
br.call.sptk b0=__libm_error_support# // Call error handling function
};;
{ .mmi
ldfd f8 = [r32] // Get return result off stack
.restore sp
add sp = 64,sp // Restore stack pointer
mov b0 = GR_SAVE_B0 // Restore return address
};;
{ .mib
mov gp = GR_SAVE_GP // Restore gp
mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
br.ret.sptk b0 // Return
};;
.endp __libm_error_region
ASM_SIZE_DIRECTIVE(__libm_error_region)
.type __libm_error_support,@function
.global __libm_error_support

674
sysdeps/ia64/fpu/e_asinf.S Normal file
View File

@ -0,0 +1,674 @@
.file "asinf.s"
// Copyright (c) 2000, 2001, Intel Corporation
// All rights reserved.
//
// Contributed 2/02/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
//
// WARRANTY DISCLAIMER
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
// http://developer.intel.com/opensource.
// History
//==============================================================
// 2/02/00 Initial revision
// 6/28/00 Improved speed
// 6/31/00 Changed register allocation because of some duplicate macros
// moved nan exit bundle up to gain a cycle.
// 8/08/00 Improved speed by avoiding SIR flush.
// 8/15/00 Bundle added after call to __libm_error_support to properly
// set [the previously overwritten] GR_Parameter_RESULT.
// 8/17/00 Changed predicate register macro-usage to direct predicate
// names due to an assembler bug.
// 10/17/00 Improved speed of x=0 and x=1 paths, set D flag if x denormal.
// Description
//=========================================
// The asinf function computes the arc sine of x in the range [-pi,+pi].
// A doman error occurs for arguments not in the range [-1,+1].
// asinf(+-0) returns +-0
// asinf(x) returns a Nan and raises the invalid exception for |x| >1
// The acosf function returns the arc cosine in the range [0, +pi] radians.
// A doman error occurs for arguments not in the range [-1,+1].
// acosf(1) returns +0
// acosf(x) returns a Nan and raises the invalid exception for |x| >1
// |x| <= sqrt(2)/2. get Ax and Bx
// poly_p1 = x p1
// poly_p3 = x2 p4 + p3
// poly_p1 = x2 (poly_p1) + x = x2(x p1) + x
// poly_p2 = x2( poly_p3) + p2 = x2(x2 p4 + p3) + p2
// poly_Ax = x5(x2( poly_p3) + p2) + x2(x p1) + x
// = x5(x2(x2 p4 + p3) + p2) + x2(x p1) + x
// poly_p7 = x2 p8 + p7
// poly_p5 = x2 p6 + p5
// poly_p7 = x4 p9 + (poly_p7)
// poly_p7 = x4 p9 + (x2 p8 + p7)
// poly_Bx = x4 (x4 p9 + (x2 p8 + p7)) + x2 p6 + p5
// answer1 = x11(x4 (x4 p9 + (x2 p8 + p7)) + x2 p6 + p5) + x5(x2(x2 p4 + p3) + p2) + x2(x p1) + x
// = x19 p9 + x17 p8 + x15 p7 x13 p6 + x11 p5 + x9 p4 + x7 p3 + x5 p2 + x3 p1 + x
// |x| > sqrt(2)/2
// Get z = sqrt(1-x2)
// Get polynomial in t = 1-x2
// t2 = t t
// t4 = t2 t2
// poly_p4 = t p5 + p4
// poly_p1 = t p1 + 1
// poly_p6 = t p7 + p6
// poly_p2 = t p3 + p2
// poly_p8 = t p9 + p8
// poly_p4 = t2 poly_p6 + poly_p4
// = t2 (t p7 + p6) + (t p5 + p4)
// poly_p2 = t2 poly_p2 + poly_p1
// = t2 (t p3 + p2) + (t p1 + 1)
// poly_p4 = t4 poly_p8 + poly_p4
// = t4 (t p9 + p8) + (t2 (t p7 + p6) + (t p5 + p4))
// P(t) = poly_p2 + t4 poly_p8
// = t2 (t p3 + p2) + (t p1 + 1) + t4 (t4 (t p9 + p8) + (t2 (t p7 + p6) + (t p5 + p4)))
// = t3 p3 + t2 p2 + t p1 + 1 + t9 p9 + t8 p8 + t7 p7 + t6 p6 + t5 p5 + t4 p4
// answer2 = - sign(x) z P(t) + (sign(x) pi/2)
//
#include "libm_support.h"
// Assembly macros
//=========================================
// predicate registers
//asinf_pred_LEsqrt2by2 = p7
//asinf_pred_GTsqrt2by2 = p8
// integer registers
ASINF_Addr1 = r33
ASINF_Addr2 = r34
ASINF_GR_1by2 = r35
ASINF_GR_3by2 = r36
ASINF_GR_5by2 = r37
GR_SAVE_B0 = r38
GR_SAVE_PFS = r39
GR_SAVE_GP = r40
GR_Parameter_X = r41
GR_Parameter_Y = r42
GR_Parameter_RESULT = r43
GR_Parameter_TAG = r44
// floating point registers
asinf_y = f32
asinf_abs_x = f33
asinf_x2 = f34
asinf_sgn_x = f35
asinf_1by2 = f36
asinf_3by2 = f37
asinf_5by2 = f38
asinf_coeff_P3 = f39
asinf_coeff_P8 = f40
asinf_coeff_P1 = f41
asinf_coeff_P4 = f42
asinf_coeff_P5 = f43
asinf_coeff_P2 = f44
asinf_coeff_P7 = f45
asinf_coeff_P6 = f46
asinf_coeff_P9 = f47
asinf_x2 = f48
asinf_x3 = f49
asinf_x4 = f50
asinf_x8 = f51
asinf_x5 = f52
asinf_const_piby2 = f53
asinf_const_sqrt2by2 = f54
asinf_x11 = f55
asinf_poly_p1 = f56
asinf_poly_p3 = f57
asinf_sinf1 = f58
asinf_poly_p2 = f59
asinf_poly_Ax = f60
asinf_poly_p7 = f61
asinf_poly_p5 = f62
asinf_sgnx_t4 = f63
asinf_poly_Bx = f64
asinf_t = f65
asinf_yby2 = f66
asinf_B = f67
asinf_B2 = f68
asinf_Az = f69
asinf_dz = f70
asinf_Sz = f71
asinf_d2z = f72
asinf_Fz = f73
asinf_z = f74
asinf_sgnx_z = f75
asinf_t2 = f76
asinf_2poly_p4 = f77
asinf_2poly_p6 = f78
asinf_2poly_p1 = f79
asinf_2poly_p2 = f80
asinf_2poly_p8 = f81
asinf_t4 = f82
asinf_Pt = f83
asinf_sgnx_2poly_p2 = f84
asinf_sgn_x_piby2 = f85
asinf_poly_p7a = f86
asinf_2poly_p4a = f87
asinf_2poly_p4b = f88
asinf_2poly_p2a = f89
asinf_poly_p1a = f90
// Data tables
//==============================================================
#ifdef _LIBC
.rodata
#else
.data
#endif
.align 16
asinf_coeff_1_table:
ASM_TYPE_DIRECTIVE(asinf_coeff_1_table,@object)
data8 0x3FC5555607DCF816 // P1
data8 0x3F9CF81AD9BAB2C6 // P4
data8 0x3FC59E0975074DF3 // P7
data8 0xBFA6F4CC2780AA1D // P6
data8 0x3FC2DD45292E93CB // P9
data8 0x3fe6a09e667f3bcd // sqrt(2)/2
ASM_SIZE_DIRECTIVE(asinf_coeff_1_table)
asinf_coeff_2_table:
ASM_TYPE_DIRECTIVE(asinf_coeff_2_table,@object)
data8 0x3FA6F108E31EFBA6 // P3
data8 0xBFCA31BF175D82A0 // P8
data8 0x3FA30C0337F6418B // P5
data8 0x3FB332C9266CB1F9 // P2
data8 0x3ff921fb54442d18 // pi_by_2
ASM_SIZE_DIRECTIVE(asinf_coeff_2_table)
.align 32
.global asinf
.section .text
.proc asinf
.align 32
asinf:
// Load the addresses of the two tables.
// Then, load the coefficients and other constants.
{ .mfi
alloc r32 = ar.pfs,1,8,4,0
fnma.s1 asinf_t = f8,f8,f1
dep.z ASINF_GR_1by2 = 0x3f,24,8 // 0x3f000000
}
{ .mfi
addl ASINF_Addr1 = @ltoff(asinf_coeff_1_table),gp
fma.s1 asinf_x2 = f8,f8,f0
addl ASINF_Addr2 = @ltoff(asinf_coeff_2_table),gp ;;
}
{ .mfi
ld8 ASINF_Addr1 = [ASINF_Addr1]
fmerge.s asinf_abs_x = f1,f8
dep ASINF_GR_3by2 = -1,r0,22,8 // 0x3fc00000
}
{ .mlx
nop.m 999
movl ASINF_GR_5by2 = 0x40200000;;
}
{ .mfi
setf.s asinf_1by2 = ASINF_GR_1by2
fmerge.s asinf_sgn_x = f8,f1
nop.i 999
}
{ .mfi
ld8 ASINF_Addr2 = [ASINF_Addr2]
nop.f 0
nop.i 999;;
}
{ .mfi
setf.s asinf_5by2 = ASINF_GR_5by2
fcmp.lt.s1 p11,p12 = f8,f0
nop.i 999;;
}
{ .mmf
ldfpd asinf_coeff_P1,asinf_coeff_P4 = [ASINF_Addr1],16
setf.s asinf_3by2 = ASINF_GR_3by2
fclass.m.unc p8,p0 = f8, 0xc3 ;; //@qnan | @snan
}
{ .mfi
ldfpd asinf_coeff_P7,asinf_coeff_P6 = [ASINF_Addr1],16
fma.s1 asinf_t2 = asinf_t,asinf_t,f0
nop.i 999
}
{ .mfi
ldfpd asinf_coeff_P3,asinf_coeff_P8 = [ASINF_Addr2],16
fma.s1 asinf_x4 = asinf_x2,asinf_x2,f0
nop.i 999;;
}
{ .mfi
ldfpd asinf_coeff_P9,asinf_const_sqrt2by2 = [ASINF_Addr1]
fclass.m.unc p10,p0 = f8, 0x07 //@zero
nop.i 999
}
{ .mfi
ldfpd asinf_coeff_P5,asinf_coeff_P2 = [ASINF_Addr2],16
fma.s1 asinf_x3 = f8,asinf_x2,f0
nop.i 999;;
}
{ .mfi
ldfd asinf_const_piby2 = [ASINF_Addr2]
frsqrta.s1 asinf_B,p0 = asinf_t
nop.i 999
}
{ .mfb
nop.m 999
(p8) fma.s f8 = f8,f1,f0
(p8) br.ret.spnt b0 ;; // Exit if x=nan
}
{ .mfb
nop.m 999
fcmp.eq.s1 p6,p0 = asinf_abs_x,f1
(p10) br.ret.spnt b0 ;; // Exit if x=0
}
{ .mfi
nop.m 999
fcmp.gt.s1 p9,p0 = asinf_abs_x,f1
nop.i 999;;
}
{ .mfi
nop.m 999
fma.s1 asinf_x8 = asinf_x4,asinf_x4,f0
nop.i 999
}
{ .mfb
nop.m 999
fma.s1 asinf_t4 = asinf_t2,asinf_t2,f0
(p6) br.cond.spnt L(ASINF_ABS_ONE) ;; // Branch if |x|=1
}
{ .mfi
nop.m 999
fma.s1 asinf_x5 = asinf_x2,asinf_x3,f0
nop.i 999
}
{ .mfb
(p9) mov GR_Parameter_TAG = 62
fma.s1 asinf_yby2 = asinf_t,asinf_1by2,f0
(p9) br.cond.spnt __libm_error_region ;; // Branch if |x|>1
}
{ .mfi
nop.m 999
fma.s1 asinf_Az = asinf_t,asinf_B,f0
nop.i 999
}
{ .mfi
nop.m 999
fma.s1 asinf_B2 = asinf_B,asinf_B,f0
nop.i 999;;
}
{ .mfi
nop.m 999
fma.s1 asinf_poly_p1 = f8,asinf_coeff_P1,f0
nop.i 999
}
{ .mfi
nop.m 999
fma.s1 asinf_2poly_p1 = asinf_coeff_P1,asinf_t,f1
nop.i 999;;
}
{ .mfi
nop.m 999
fma.s1 asinf_poly_p3 = asinf_coeff_P4,asinf_x2,asinf_coeff_P3
nop.i 999
}
{ .mfi
nop.m 999
fma.s1 asinf_2poly_p6 = asinf_coeff_P7,asinf_t,asinf_coeff_P6
nop.i 999;;
}
{ .mfi
nop.m 999
fma.s1 asinf_poly_p7 = asinf_x2,asinf_coeff_P8,asinf_coeff_P7
nop.i 999
}
{ .mfi
nop.m 999
fma.s1 asinf_2poly_p2 = asinf_coeff_P3,asinf_t,asinf_coeff_P2
nop.i 999;;
}
{ .mfi
nop.m 999
fma.s1 asinf_poly_p5 = asinf_x2,asinf_coeff_P6,asinf_coeff_P5
nop.i 999
}
{ .mfi
nop.m 999
fma.s1 asinf_2poly_p4 = asinf_coeff_P5,asinf_t,asinf_coeff_P4
nop.i 999;;
}
{ .mfi
nop.m 999
fma.d.s1 asinf_x11 = asinf_x8,asinf_x3,f0
nop.i 999
}
{ .mfi
nop.m 999
fnma.s1 asinf_dz = asinf_B2,asinf_yby2,asinf_1by2
nop.i 999;;
}
{ .mfi
nop.m 999
fma.s1 asinf_poly_p1a = asinf_x2,asinf_poly_p1,f8
nop.i 999
}
{ .mfi
nop.m 999
fma.s1 asinf_2poly_p8 = asinf_coeff_P9,asinf_t,asinf_coeff_P8
nop.i 999;;
}
// Get the absolute value of x and determine the region in which x lies
{ .mfi
nop.m 999
fcmp.le.s1 p7,p8 = asinf_abs_x,asinf_const_sqrt2by2
nop.i 999
}
{ .mfi
nop.m 999
fma.s1 asinf_poly_p2 = asinf_x2,asinf_poly_p3,asinf_coeff_P2
nop.i 999;;
}
{ .mfi
nop.m 999
fma.s1 asinf_poly_p7a = asinf_x4,asinf_coeff_P9,asinf_poly_p7
nop.i 999
}
{ .mfi
nop.m 999
fma.s1 asinf_2poly_p2a = asinf_2poly_p2,asinf_t2,asinf_2poly_p1
nop.i 999;;
}
{ .mfi
nop.m 999
(p8) fma.s1 asinf_sgnx_t4 = asinf_sgn_x,asinf_t4,f0
nop.i 999
}
{ .mfi
nop.m 999
(p8) fma.s1 asinf_2poly_p4a = asinf_2poly_p6,asinf_t2,asinf_2poly_p4
nop.i 999;;
}
{ .mfi
nop.m 999
(p8) fma.s1 asinf_Sz = asinf_5by2,asinf_dz,asinf_3by2
nop.i 999
}
{ .mfi
nop.m 999
(p8) fma.s1 asinf_d2z = asinf_dz,asinf_dz,f0
nop.i 999;;
}
{ .mfi
nop.m 999
(p8) fma.s1 asinf_sgn_x_piby2 = asinf_sgn_x,asinf_const_piby2,f0
nop.i 999
}
{ .mfi
nop.m 999
(p7) fma.d.s1 asinf_poly_Ax = asinf_x5,asinf_poly_p2,asinf_poly_p1a
nop.i 999;;
}
{ .mfi
nop.m 999
(p7) fma.d.s1 asinf_poly_Bx = asinf_x4,asinf_poly_p7a,asinf_poly_p5
nop.i 999
}
{ .mfi
nop.m 999
(p8) fma.s1 asinf_sgnx_2poly_p2 = asinf_sgn_x,asinf_2poly_p2a,f0
nop.i 999;;
}
{ .mfi
nop.m 999
fcmp.eq.s0 p6,p0 = f8,f0 // Only purpose is to set D if x denormal
nop.i 999
}
{ .mfi
nop.m 999
(p8) fma.s1 asinf_2poly_p4b = asinf_2poly_p8,asinf_t4,asinf_2poly_p4a
nop.i 999;;
}
{ .mfi
nop.m 999
(p8) fma.s1 asinf_Fz = asinf_d2z,asinf_Sz,asinf_dz
nop.i 999;;
}
{ .mfi
nop.m 999
(p8) fma.d.s1 asinf_Pt = asinf_2poly_p4b,asinf_sgnx_t4,asinf_sgnx_2poly_p2
nop.i 999;;
}
{ .mfi
nop.m 999
(p8) fma.d.s1 asinf_z = asinf_Az,asinf_Fz,asinf_Az
nop.i 999;;
}
.pred.rel "mutex",p8,p7 //asinf_pred_GTsqrt2by2,asinf_pred_LEsqrt2by2
{ .mfi
nop.m 999
(p8) fnma.s f8 = asinf_z,asinf_Pt,asinf_sgn_x_piby2
nop.i 999
}
{ .mfb
nop.m 999
(p7) fma.s f8 = asinf_x11,asinf_poly_Bx,asinf_poly_Ax
br.ret.sptk b0 ;;
}
L(ASINF_ABS_ONE):
// Here for short exit if |x|=1
{ .mfb
nop.m 999
fma.s f8 = asinf_sgn_x,asinf_const_piby2,f0
br.ret.sptk b0
}
;;
.endp asinf
ASM_SIZE_DIRECTIVE(asinf)
// Stack operations when calling error support.
// (1) (2)
// sp -> + psp -> +
// | |
// | | <- GR_Y
// | |
// | <-GR_Y Y2->|
// | |
// | | <- GR_X
// | |
// sp-64 -> + sp -> +
// save ar.pfs save b0
// save gp
// Stack operations when calling error support.
// (3) (call) (4)
// psp -> + sp -> +
// | |
// R3 ->| <- GR_RESULT | -> f8
// | |
// Y2 ->| <- GR_Y |
// | |
// X1 ->| |
// | |
// sp -> + +
// restore gp
// restore ar.pfs
.proc __libm_error_region
__libm_error_region:
.prologue
{ .mfi
add GR_Parameter_Y=-32,sp // Parameter 2 value
nop.f 999
.save ar.pfs,GR_SAVE_PFS
mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
}
{ .mfi
.fframe 64
add sp=-64,sp // Create new stack
nop.f 0
mov GR_SAVE_GP=gp // Save gp
};;
{ .mmi
stfs [GR_Parameter_Y] = f1,16 // Store Parameter 2 on stack
add GR_Parameter_X = 16,sp // Parameter 1 address
.save b0, GR_SAVE_B0
mov GR_SAVE_B0=b0 // Save b0
};;
.body
{ .mfi
nop.m 0
frcpa.s0 f9,p0 = f0,f0
nop.i 0
};;
{ .mib
stfs [GR_Parameter_X] = f8 // Store Parameter 1 on stack
add GR_Parameter_RESULT = 0,GR_Parameter_Y
nop.b 0 // Parameter 3 address
}
{ .mib
stfs [GR_Parameter_Y] = f9 // Store Parameter 3 on stack
add GR_Parameter_Y = -16,GR_Parameter_Y
br.call.sptk b0=__libm_error_support# // Call error handling function
};;
{ .mmi
nop.m 0
nop.m 0
add GR_Parameter_RESULT = 48,sp
};;
{ .mmi
ldfs f8 = [GR_Parameter_RESULT] // Get return result off stack
.restore sp
add sp = 64,sp // Restore stack pointer
mov b0 = GR_SAVE_B0 // Restore return address
};;
{ .mib
mov gp = GR_SAVE_GP // Restore gp
mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
br.ret.sptk b0 // Return
};;
.endp __libm_error_region
ASM_SIZE_DIRECTIVE(__libm_error_region)
.type __libm_error_support#,@function
.global __libm_error_support#

777
sysdeps/ia64/fpu/e_asinl.S Normal file
View File

@ -0,0 +1,777 @@
.file "asinl.s"
// Copyright (c) 2000, 2001, Intel Corporation
// All rights reserved.
//
// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
//
// WARRANTY DISCLAIMER
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
// http://developer.intel.com/opensource.
//
// History
//==============================================================
// 2/02/00 Initial version
// 4/04/00 Unwind support added
// 8/15/00 Bundle added after call to __libm_error_support to properly
// set [the previously overwritten] GR_Parameter_RESULT.
//
// API
//==============================================================
// long double = asinl(long double)
// input floating point f8
// output floating point f8
//
// Registers used
//==============================================================
//
// predicate registers used:
// p6 -> p12
//
// floating-point registers used:
// f8 has input, then output
// f32 -> f87, f8 -> f13, f32 -> f87
//
// general registers used:
// r32 -> r47
//
// Overview of operation
//==============================================================
// There are three paths
// 1. |x| < 2^-40 ASIN_TINY
// 2. 2^-40 <= |x| < 1/4 ASIN_POLY
// 3. 1/4 <= |x| < 1 ASIN_ATAN
#include "libm_support.h"
// Assembly macros
//==============================================================
FR_RESULT = f10
FR_X = f8
FR_Y = f1
asin_P79 = f32
asin_P59 = f33
asin_P39 = f34
asin_P19 = f35
asin_P810 = f36
asin_P610 = f37
asin_P410 = f38
asin_P210 = f39
asin_A1 = f41
asin_A2 = f42
asin_A3 = f43
asin_A4 = f44
asin_A5 = f45
asin_A6 = f46
asin_A7 = f47
asin_A8 = f48
asin_A9 = f49
asin_A10 = f50
asin_X2 = f51
asin_X4 = f52
asin_B = f53
asin_Bb = f54
asin_C = f55
asin_Cc = f56
asin_D = f57
asin_W = f58
asin_Ww = f59
asin_y0 = f60
asin_y1 = f61
asin_y2 = f62
asin_H = f63
asin_Hh = f64
asin_t1 = f65
asin_t2 = f66
asin_t3 = f67
asin_t4 = f68
asin_t5 = f69
asin_Pseries = f70
asin_NORM_f8 = f71
asin_ABS_NORM_f8 = f72
asin_2m100 = f73
asin_P1P2 = f74
asin_HALF = f75
asin_1mD = f76
asin_1mB = f77
asin_1mBmC = f78
asin_S = f79
asin_BmWW = f80
asin_BmWWpb = f81
asin_2W = f82
asin_1d2W = f83
asin_Dd = f84
asin_XWw = f85
asin_low = f86
asin_pi_by_2 = f87
asin_pi_by_2_lo = f88
asin_GR_17_ones = r33
asin_GR_16_ones = r34
asin_GR_signexp_f8 = r35
asin_GR_exp = r36
asin_GR_true_exp = r37
asin_GR_ff9b = r38
GR_SAVE_B0 = r39
GR_SAVE_SP = r40
GR_SAVE_PFS = r33
// r33 can be used safely.
// r40 is address of table of coefficients
// Later it is used to save sp across calls
GR_SAVE_GP = r41
asin_GR_fffe = r42
asin_GR_retval = r43
GR_Parameter_X = r44
GR_Parameter_Y = r45
GR_Parameter_RESULT = r46
GR_Parameter_TAG = r47
// 2^-40:
// A true exponent of -40 is
// : -40 + register_bias
// : -28 + ffff = ffd7
// A true exponent of -100 is
// : -100 + register_bias
// : -64 + ffff = ff9b
// Data tables
//==============================================================
#ifdef _LIBC
.rodata
#else
.data
#endif
.align 16
asin_coefficients:
ASM_TYPE_DIRECTIVE(asin_coefficients,@object)
data8 0xBB08911F2013961E, 0x00003FF8 // A10
data8 0x981F1095A23A87D3, 0x00003FF8 // A9
data8 0xBDF09C6C4177BCC6, 0x00003FF8 // A8
data8 0xE4C3A60B049ACCEA, 0x00003FF8 // A7
data8 0x8E2789F4E8A8F1AD, 0x00003FF9 // A6
data8 0xB745D09B2B0E850B, 0x00003FF9 // A5
data8 0xF8E38E3BC4C50920, 0x00003FF9 // A4
data8 0xB6DB6DB6D89FCD81, 0x00003FFA // A3
data8 0x99999999999AF376, 0x00003FFB // A2
data8 0xAAAAAAAAAAAAAA71, 0x00003FFC // A1
data8 0xc90fdaa22168c234, 0x00003FFF // pi_by_2_hi
data8 0xc4c6628b80dc1cd1, 0x00003FBF // pi_by_2_lo
ASM_SIZE_DIRECTIVE(asin_coefficients)
.align 32
.global asinl#
.section .text
.proc asinl#
.align 32
asinl:
{ .mfi
alloc r32 = ar.pfs,1,11,4,0
(p0) fnorm asin_NORM_f8 = f8
(p0) mov asin_GR_17_ones = 0x1ffff
}
{ .mii
(p0) mov asin_GR_16_ones = 0xffff
(p0) mov asin_GR_ff9b = 0xff9b ;;
nop.i 999
}
{ .mmi
(p0) setf.exp asin_2m100 = asin_GR_ff9b
(p0) addl r40 = @ltoff(asin_coefficients), gp
nop.i 999
}
;;
{ .mmi
ld8 r40 = [r40]
nop.m 999
nop.i 999
}
;;
// Load the constants
{ .mmi
(p0) ldfe asin_A10 = [r40],16 ;;
(p0) ldfe asin_A9 = [r40],16
nop.i 999 ;;
}
{ .mmi
(p0) ldfe asin_A8 = [r40],16 ;;
(p0) ldfe asin_A7 = [r40],16
nop.i 999 ;;
}
{ .mmi
(p0) ldfe asin_A6 = [r40],16 ;;
(p0) getf.exp asin_GR_signexp_f8 = asin_NORM_f8
nop.i 999
}
{ .mmi
(p0) ldfe asin_A5 = [r40],16 ;;
(p0) ldfe asin_A4 = [r40],16
nop.i 999 ;;
}
{ .mfi
nop.m 999
(p0) fmerge.s asin_ABS_NORM_f8 = f0, asin_NORM_f8
(p0) and asin_GR_exp = asin_GR_signexp_f8, asin_GR_17_ones ;;
}
// case 1: |x| < 2^-40 ==> p6 (includes x = +-0)
// case 2: 2^-40 <= |x| < 2^-2 ==> p8
// case 3: 2^-2 <= |x| < 1 ==> p9
// case 4: 1 <= |x| ==> p11
// In case 4, we pick up the special case x = +-1 and return +-pi/2
{ .mii
(p0) ldfe asin_A3 = [r40],16
(p0) sub asin_GR_true_exp = asin_GR_exp, asin_GR_16_ones ;;
(p0) cmp.ge.unc p6, p7 = -41, asin_GR_true_exp ;;
}
{ .mii
(p0) ldfe asin_A2 = [r40],16
(p7) cmp.ge.unc p8, p9 = -3, asin_GR_true_exp ;;
(p9) cmp.ge.unc p10, p11 = -1, asin_GR_true_exp
}
{ .mmi
(p0) ldfe asin_A1 = [r40],16 ;;
(p0) ldfe asin_pi_by_2 = [r40],16
nop.i 999
}
// case 4: |x| >= 1
{ .mib
nop.m 999
nop.i 999
(p11) br.spnt L(ASIN_ERROR_RETURN) ;;
}
// case 1: |x| < 2^-40
{ .mfb
nop.m 999
(p6) fma.s0 f8 = asin_2m100,f8,f8
(p6) br.ret.spnt b0 ;;
}
// case 2: 2^-40 <= |x| < 2^-2 ==> p8
{ .mfi
nop.m 999
(p8) fma.s1 asin_X2 = f8,f8, f0
nop.i 999 ;;
}
{ .mfi
nop.m 999
(p8) fma.s1 asin_X4 = asin_X2,asin_X2, f0
nop.i 999 ;;
}
{ .mfi
nop.m 999
(p8) fma.s1 asin_P810 = asin_X4, asin_A10, asin_A8
nop.i 999
}
{ .mfi
nop.m 999
(p8) fma.s1 asin_P79 = asin_X4, asin_A9, asin_A7
nop.i 999 ;;
}
{ .mfi
nop.m 999
(p8) fma.s1 asin_P610 = asin_X4, asin_P810, asin_A6
nop.i 999
}
{ .mfi
nop.m 999
(p8) fma.s1 asin_P59 = asin_X4, asin_P79, asin_A5
nop.i 999 ;;
}
{ .mfi
nop.m 999
(p8) fma.s1 asin_P410 = asin_X4, asin_P610, asin_A4
nop.i 999
}
{ .mfi
nop.m 999
(p8) fma.s1 asin_P39 = asin_X4, asin_P59, asin_A3
nop.i 999 ;;
}
{ .mfi
nop.m 999
(p8) fma.s1 asin_P210 = asin_X4, asin_P410, asin_A2
nop.i 999
}
{ .mfi
nop.m 999
(p8) fma.s1 asin_P19 = asin_X4, asin_P39, asin_A1
nop.i 999 ;;
}
{ .mfi
nop.m 999
(p8) fma.s1 asin_P1P2 = asin_X2, asin_P210, asin_P19
nop.i 999 ;;
}
{ .mfi
nop.m 999
(p8) fma.s1 asin_P1P2 = asin_X2, asin_P1P2, f0
nop.i 999 ;;
}
{ .mfb
nop.m 999
(p8) fma.s0 f8 = asin_NORM_f8, asin_P1P2, asin_NORM_f8
(p8) br.ret.spnt b0 ;;
}
// case 3: 2^-2 <= |x| < 1
// 1- X*X is computed as B + b
// Step 1.1: Get B and b
// atan2 will return
// f8 = Z_hi
// f10 = Z_lo
// f11 = s_lo
{ .mfi
(p0) mov asin_GR_fffe = 0xfffe
(p0) fmerge.se f8 = asin_ABS_NORM_f8, asin_ABS_NORM_f8
nop.i 0
};;
{ .mmf
nop.m 0
(p0) setf.exp asin_HALF = asin_GR_fffe
(p0) fmerge.se f12 = asin_NORM_f8, asin_NORM_f8 ;;
}
{ .mfi
nop.m 999
(p0) fcmp.lt.unc.s1 p6,p7 = asin_ABS_NORM_f8, asin_HALF
nop.i 999 ;;
}
{ .mfi
nop.m 999
(p7) fma.s1 asin_D = f1,f1,asin_ABS_NORM_f8
nop.i 999
}
{ .mfi
nop.m 999
(p7) fms.s1 asin_C = f1,f1,asin_ABS_NORM_f8
nop.i 999 ;;
}
{ .mfi
nop.m 999
(p7) fma.s1 asin_B = asin_C, asin_D, f0
nop.i 999
}
{ .mfi
nop.m 999
(p7) fms.s1 asin_1mD = f1,f1,asin_D
nop.i 999 ;;
}
{ .mfi
nop.m 999
(p7) fma.s1 asin_Dd = asin_1mD,f1, asin_ABS_NORM_f8
nop.i 999
}
{ .mfi
nop.m 999
(p7) fms.s1 asin_Bb = asin_C, asin_D, asin_B
nop.i 999 ;;
}
{ .mfi
nop.m 999
(p7) fma.s1 asin_Bb = asin_C, asin_Dd, asin_Bb
nop.i 999
}
{ .mfi
nop.m 999
(p6) fma.s1 asin_C = asin_ABS_NORM_f8, asin_ABS_NORM_f8, f0
nop.i 999 ;;
}
{ .mfi
nop.m 999
(p6) fms.s1 asin_B = f1, f1, asin_C
nop.i 999
}
{ .mfi
nop.m 999
(p6) fms.s1 asin_Cc = asin_ABS_NORM_f8, asin_ABS_NORM_f8, asin_C
nop.i 999 ;;
}
{ .mfi
nop.m 999
(p0) fma.s1 asin_Hh = asin_HALF, asin_B, f0
nop.i 999
}
{ .mfi
nop.m 999
(p6) fms.s1 asin_1mB = f1, f1, asin_B
nop.i 999 ;;
}
// Step 1.2:
// sqrt(B + b) is computed as W + w
// Get W
{ .mfi
nop.m 999
(p0) frsqrta.s1 asin_y0,p8 = asin_B
nop.i 999 ;;
}
{ .mfi
nop.m 999
(p6) fms.s1 asin_1mBmC = asin_1mB, f1, asin_C
nop.i 999 ;;
}
{ .mfi
nop.m 999
(p0) fma.s1 asin_t1 = asin_y0, asin_y0, f0
nop.i 999 ;;
}
{ .mfi
nop.m 999
(p6) fms.s1 asin_Bb = asin_1mBmC, f1, asin_Cc
nop.i 999 ;;
}
{ .mfi
nop.m 999
(p0) fnma.s1 asin_t2 = asin_t1, asin_Hh, asin_HALF
nop.i 999 ;;
}
{ .mfi
nop.m 999
(p0) fma.s1 asin_y1 = asin_t2, asin_y0, asin_y0
nop.i 999 ;;
}
{ .mfi
nop.m 999
(p0) fma.s1 asin_t3 = asin_y1, asin_Hh, f0
nop.i 999 ;;
}
{ .mfi
nop.m 999
(p0) fnma.s1 asin_t4 = asin_t3, asin_y1, asin_HALF
nop.i 999 ;;
}
{ .mfi
nop.m 999
(p0) fma.s1 asin_y2 = asin_t4, asin_y1, asin_y1
nop.i 999 ;;
}
{ .mfi
nop.m 999
(p0) fma.s1 asin_S = asin_B, asin_y2, f0
nop.i 999
}
{ .mfi
nop.m 999
(p0) fma.s1 asin_H = asin_y2, asin_HALF, f0
nop.i 999 ;;
}
{ .mfi
nop.m 999
(p0) fma.s1 asin_t5 = asin_Hh, asin_y2, f0
nop.i 999 ;;
}
{ .mfi
nop.m 999
(p0) fnma.s1 asin_Dd = asin_S, asin_S, asin_B
nop.i 999 ;;
}
{ .mfi
nop.m 999
(p0) fma.s1 asin_W = asin_Dd, asin_H, asin_S
nop.i 999 ;;
}
{ .mfi
nop.m 999
(p0) fma.s1 asin_2W = asin_W, f1, asin_W
nop.i 999
}
// Step 1.3
// Get w
{ .mfi
nop.m 999
(p0) fnma.s1 asin_BmWW = asin_W, asin_W, asin_B
nop.i 999 ;;
}
// Step 2
// asin(x) = atan2(X,sqrt(1-X*X))
// = atan2(X, W) -Xw
// corr = Xw
// asin(x) = Z_hi + (s_lo*Z_lo - corr)
// Call atan2(X, W)
// Save W in f9
// Save X in f12
// Save w in f13
{ .mfi
nop.m 999
(p0) fmerge.se f9 = asin_W, asin_W
nop.i 999 ;;
}
{ .mfi
nop.m 999
(p0) fma.s1 asin_BmWWpb = asin_BmWW, f1, asin_Bb
nop.i 999 ;;
}
{ .mfi
nop.m 999
(p0) frcpa.s1 asin_1d2W,p9 = f1, asin_2W
nop.i 999 ;;
}
{ .mfi
nop.m 999
(p0) fma.s1 asin_Ww = asin_BmWWpb, asin_1d2W, f0
nop.i 999 ;;
}
.endp asinl
ASM_SIZE_DIRECTIVE(asinl)
.proc __libm_callout
__libm_callout:
.prologue
{ .mfi
nop.m 0
nop.f 0
.save ar.pfs,GR_SAVE_PFS
mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
};;
{ .mfi
mov GR_SAVE_GP=gp // Save gp
nop.f 0
.save b0, GR_SAVE_B0
mov GR_SAVE_B0=b0 // Save b0
}
.body
{.mfb
nop.m 0
(p0) fmerge.se f13 = asin_Ww, asin_Ww
(p0) br.call.sptk.many b0=__libm_atan2_reg#
};;
{ .mfi
mov gp = GR_SAVE_GP // Restore gp
(p0) fma.s1 asin_XWw = asin_ABS_NORM_f8,f13,f0
mov b0 = GR_SAVE_B0 // Restore return address
};;
// asin_XWw = Xw = corr
// asin_low = (s_lo * Z_lo - corr)
// f8 = Z_hi + (s_lo * Z_lo - corr)
{ .mfi
nop.m 999
(p0) fms.s1 asin_low = f11, f10, asin_XWw
mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
};;
{ .mfi
nop.m 999
(p0) fma.s0 f8 = f8, f1, asin_low
nop.i 999 ;;
}
{ .mfb
nop.m 999
(p0) fmerge.s f8 = f12,f8
(p0) br.ret.sptk b0 ;;
}
.endp __libm_callout
ASM_SIZE_DIRECTIVE(__libm_callout)
.proc SPECIAL
SPECIAL:
L(ASIN_ERROR_RETURN):
// If X is 1, return (sign of X)pi/2
{ .mfi
nop.m 999
(p0) fcmp.eq.unc p6,p7 = asin_ABS_NORM_f8,f1
nop.i 999 ;;
}
{ .mfb
(p6) ldfe asin_pi_by_2_lo = [r40]
(p6) fmerge.s asin_pi_by_2 = f8,asin_pi_by_2
nop.b 0;;
}
// If X is a NAN, leave
// qnan snan inf norm unorm 0 -+
// 1 1 0 0 0 0 11
{ .mfb
nop.m 999
(p6) fma.s0 f8 = f8,asin_pi_by_2_lo,asin_pi_by_2
(p6) br.ret.spnt b0
}
{ .mfi
nop.m 999
(p0) fclass.m.unc p12,p0 = f8, 0xc3
nop.i 999 ;;
}
{ .mfb
nop.m 999
(p12) fma.s0 f8 = f8,f1,f0
(p12) br.ret.spnt b0 ;;
}
{ .mfi
(p0) mov GR_Parameter_TAG = 60
(p0) frcpa f10, p6 = f0, f0
nop.i 0
};;
.endp SPECIAL
ASM_SIZE_DIRECTIVE(SPECIAL)
.proc __libm_error_region
__libm_error_region:
.prologue
{ .mfi
add GR_Parameter_Y=-32,sp // Parameter 2 value
nop.f 0
.save ar.pfs,GR_SAVE_PFS
mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
}
{ .mfi
.fframe 64
add sp=-64,sp // Create new stack
nop.f 0
mov GR_SAVE_GP=gp // Save gp
};;
{ .mmi
stfe [GR_Parameter_Y] = FR_Y,16 // Store Parameter 2 on stack
add GR_Parameter_X = 16,sp // Parameter 1 address
.save b0, GR_SAVE_B0
mov GR_SAVE_B0=b0 // Save b0
};;
.body
{ .mib
stfe [GR_Parameter_X] = FR_X // Store Parameter 1 on stack
add GR_Parameter_RESULT = 0,GR_Parameter_Y
nop.b 0 // Parameter 3 address
}
{ .mib
stfe [GR_Parameter_Y] = FR_RESULT // Store Parameter 3 on stack
add GR_Parameter_Y = -16,GR_Parameter_Y
br.call.sptk b0=__libm_error_support# // Call error handling function
};;
{ .mmi
nop.m 0
nop.m 0
add GR_Parameter_RESULT = 48,sp
};;
{ .mmi
ldfe f8 = [GR_Parameter_RESULT] // Get return result off stack
.restore sp
add sp = 64,sp // Restore stack pointer
mov b0 = GR_SAVE_B0 // Restore return address
};;
{ .mib
mov gp = GR_SAVE_GP // Restore gp
mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
br.ret.sptk b0 // Return
};;
.endp __libm_error_region
ASM_SIZE_DIRECTIVE(__libm_error_region)
.type __libm_error_support#,@function
.global __libm_error_support#
.type __libm_atan2_reg#,@function
.global __libm_atan2_reg#

1124
sysdeps/ia64/fpu/e_atan2.S Normal file

File diff suppressed because it is too large Load Diff

907
sysdeps/ia64/fpu/e_atan2f.S Normal file
View File

@ -0,0 +1,907 @@
.file "atan2f.s"
// Copyright (c) 2000, 2001, Intel Corporation
// All rights reserved.
//
// Contributed 6/1/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
//
// WARRANTY DISCLAIMER
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
// http://developer.intel.com/opensource.
// History
//==============================================================
// 6/01/00 Initial version
// 8/15/00 Bundle added after call to __libm_error_support to properly
// set [the previously overwritten] GR_Parameter_RESULT.
// 8/17/00 Changed predicate register macro-usage to direct predicate
// names due to an assembler bug.
// 1/05/01 Fixed flag settings for denormal input.
// 1/19/01 Added documentation
// 1/30/01 Improved speed
// Description
//=========================================
// The atan2 function computes the principle value of the arc tangent of y/x using
// the signs of both arguments to determine the quadrant of the return value.
// A domain error may occur if both arguments are zero.
// The atan2 function returns the arc tangent of y/x in the range [-pi,+pi] radians.
//..
//..Let (v,u) = (y,x) if |y| <= |x|, and (v,u) = (x,y) otherwise. Note that
//..v and u can be negative. We state the relationship between atan2(y,x) and
//..atan(v/u).
//..
//..Let swap = false if v = y, and swap = true if v = x.
//..Define C according to the matrix
//..
//.. TABLE FOR C
//.. x +ve x -ve
//.. no swap (swap = false) sgn(y)*0 sgn(y)*pi
//.. swap (swap = true ) sgn(y)*pi/2 sgn(y)*pi/2
//..
//.. atan2(y,x) = C + atan(v/u) if no swap
//.. atan2(y,x) = C - atan(v/u) if swap
//..
//..These relationship is more efficient to compute as we accommodate signs in v and u
//..saving the need to obtain the absolute value before computation can proceed.
//..
//..Suppose (v,u) = (y,x), we calculate atan(v/u) as follows:
//..A = y * frcpa(x) (so A = (y/x)(1 - beta))
//..atan(y/x) = atan(A) + atan( ((y/x)-A))/(1 + (y/x)A) ), the second term is
//..a correction.
//..atan(A) is approximated by a polynomial
//..A + p1 A^3 + p2 A^5 + ... + p10 A^21,
//..atan(G) is approximated as follows:
//..Let G = (y - Ax)/(x + Ay), atan(G) can be approximated by G + g * p1
//..where g is a limited precision approximation to G via g = (y - Ax)*frcpa(x + Ay).
//..
//..Suppose (v,u) = (x,y), we calculate atan(v/u) as follows:
//..Z = x * frcpa(y) (so Z = (x/y)(1 - beta))
//..atan(x/y) = atan(Z) + atan( ((x/y)-Z))/(1 + (x/y)Z) ), the second term is
//..a correction.
//..atan(Z) is approximated by a polynomial
//..Z + p1 Z^3 + p2 Z^5 + ... + p10 Z^21,
//..atan(T) is approximated as follows:
//..Let T = (x - Ay)/(y + Ax), atan(T) can be approximated by T + t * p1
//..where t is a limited precision approximation to T via t = (x - Ay)*frcpa(y + Ax).
//..
//..
//..A = y * frcpa(x)
//..atan(A) ~=~ A + p1 A^3 + ... + P10 A^21
//..
//..This polynomial is computed as follows:
//..Asq = A*A; Acub = A*Asq, A4 = Asq*Asq
//..A5 = Asq*Acub, A6 = Asq*A4; A11 = A5 * A6
//..
//..poly_A1 = p9 + Asq*p10, poly_A2 = p7 + Asq*p8, poly_A3 = p5 + Asq*p6
//..poly_A1 = poly_A2 + A4 * poly_A1
//..poly_A1 = poly_A3 + A4 * poly_A1
//..
//..poly_A4 = p1 * A
//,,poly_A5 = p3 + Asq * p4, poly_A4 = A + Asq*poly_A4
//..poly_A5 = p2 + Asq * poly_A5
//..poly_A4 = poly_A4 + A5 * poly_A5
//..
//..atan_A = poly_A4 + A11 * poly_A1
//..
//..atan(G) is approximated as follows:
//..G_numer = y - A*x, G_denom = x + A*y
//..H1 = frcpa(G_denom)
//..H_beta = 1 - H1 * G_denom
//..H2 = H1 + H1 * H_beta
//..H_beta2 = H_beta*H_beta
//..H3 = H2 + H2*H_beta2
//..g = H1 * G_numer; gsq = g*g; atan_G = g*p1, atan_G = atan_G*gsq
//..atan_G = G_numer*H3 + atan_G
//..
//..
//..A = y * frcpa(x)
//..atan(A) ~=~ A + p1 A^3 + ... + P10 A^21
//..
//..This polynomial is computed as follows:
//..Asq = A*A; Acub = A*Asq, A4 = Asq*Asq
//..A5 = Asq*Acub, A6 = Asq*A4; A11 = A5 * A6
//..
//..poly_A1 = p9 + Asq*p10, poly_A2 = p7 + Asq*p8, poly_A3 = p5 + Asq*p6
//..poly_A1 = poly_A2 + A4 * poly_A1
//..poly_A1 = poly_A3 + A4 * poly_A1
//..
//..poly_A4 = p1 * A
//,,poly_A5 = p3 + Asq * p4, poly_A4 = A + Asq*poly_A4
//..poly_A5 = p2 + Asq * poly_A5
//..poly_A4 = poly_A4 + A5 * poly_A5
//..
//..atan_A = poly_A4 + A11 * poly_A1
//..
//..
//..====================================================================
//.. COEFFICIENTS USED IN THE COMPUTATION
//..====================================================================
//coef_pj, j = 1,2,...,10; atan(A) ~=~ A + p1 A^3 + p2 A^5 + ... + p10 A^21
//
// coef_p1 = -.3333332707155439167401311806315789E+00
// coef_p1 in dbl = BFD5 5555 1219 1621
//
// coef_p2 = .1999967670926658391827857030875748E+00
// coef_p2 in dbl = 3FC9 997E 7AFB FF4E
//
// coef_p3 = -.1427989384500152360161563301087296E+00
// coef_p3 in dbl = BFC2 473C 5145 EE38
//
// coef_p4 = .1105852823460720770079031213661163E+00
// coef_p4 in dbl = 3FBC 4F51 2B18 65F5
//
// coef_p5 = -.8811839915595312348625710228448363E-01
// coef_p5 in dbl = BFB6 8EED 6A8C FA32
//
// coef_p6 = .6742329836955067042153645159059714E-01
// coef_p6 in dbl = 3FB1 42A7 3D7C 54E3
//
// coef_p7 = -.4468571068774672908561591262231909E-01
// coef_p7 in dbl = BFA6 E10B A401 393F
//
// coef_p8 = .2252333246746511135532726960586493E-01
// coef_p8 in dbl = 3F97 105B 4160 F86B
//
// coef_p9 = -.7303884867007574742501716845542314E-02
// coef_p9 in dbl = BF7D EAAD AA33 6451
//
// coef_p10 = .1109686868355312093949039454619058E-02
// coef_p10 in dbl = 3F52 2E5D 33BC 9BAA
//
// Special values
//==============================================================
// Y x Result
// +number +inf +0
// -number +inf -0
// +number -inf +pi
// -number -inf -pi
//
// +inf +number +pi/2
// -inf +number -pi/2
// +inf -number +pi/2
// -inf -number -pi/2
//
// +inf +inf +pi/4
// -inf +inf -pi/4
// +inf -inf +3pi/4
// -inf -inf -3pi/4
//
// +1 +1 +pi/4
// -1 +1 -pi/4
// +1 -1 +3pi/4
// -1 -1 -3pi/4
//
// +number +0 +pi/2 // does not raise DBZ
// -number +0 -pi/2 // does not raise DBZ
// +number -0 +pi/2 // does not raise DBZ
// -number -0 -pi/2 // does not raise DBZ
//
// +0 +number +0
// -0 +number -0
// +0 -number +pi
// -0 -number -pi
//
// +0 +0 +0 // does not raise invalid
// -0 +0 -0 // does not raise invalid
// +0 -0 +pi // does not raise invalid
// -0 -0 -pi // does not raise invalid
//
// Nan anything quiet Y
// anything NaN quiet X
// atan2(+-0/+-0) sets double error tag to 37
// atan2f(+-0/+-0) sets single error tag to 38
// These are domain errors.
#include "libm_support.h"
//
// Assembly macros
//=========================================
// integer registers
atan2f_GR_Addr_1 = r33
atan2f_GR_Addr_2 = r34
GR_SAVE_B0 = r35
GR_SAVE_PFS = r36
GR_SAVE_GP = r37
GR_Parameter_X = r38
GR_Parameter_Y = r39
GR_Parameter_RESULT = r40
GR_Parameter_TAG = r41
// floating point registers
atan2f_coef_p1 = f32
atan2f_coef_p10 = f33
atan2f_coef_p7 = f34
atan2f_coef_p6 = f35
atan2f_coef_p3 = f36
atan2f_coef_p2 = f37
atan2f_coef_p9 = f38
atan2f_coef_p8 = f39
atan2f_coef_p5 = f40
atan2f_coef_p4 = f41
atan2f_const_piby2 = f42
atan2f_const_pi = f43
atan2f_const_piby4 = f44
atan2f_const_3piby4 = f45
atan2f_xsq = f46
atan2f_ysq = f47
atan2f_xy = f48
atan2f_const_1 = f49
atan2f_sgn_Y = f50
atan2f_Z0 = f51
atan2f_A0 = f52
atan2f_Z = f53
atan2f_A = f54
atan2f_C = f55
atan2f_U = f56
atan2f_Usq = f57
atan2f_U4 = f58
atan2f_U6 = f59
atan2f_U8 = f60
atan2f_poly_u109 = f61
atan2f_poly_u87 = f62
atan2f_poly_u65 = f63
atan2f_poly_u43 = f64
atan2f_poly_u21 = f65
atan2f_poly_u10to7 = f66
atan2f_poly_u6to3 = f67
atan2f_poly_u10to3 = f68
atan2f_poly_u10to0 = f69
atan2f_poly_u210 = f70
atan2f_T_numer = f71
atan2f_T_denom = f72
atan2f_G_numer = f73
atan2f_G_denom = f74
atan2f_p1rnum = f75
atan2f_R_denom = f76
atan2f_R_numer = f77
atan2f_pR = f78
atan2f_pRC = f79
atan2f_pQRC = f80
atan2f_Q1 = f81
atan2f_Q_beta = f82
atan2f_Q2 = f83
atan2f_Q_beta2 = f84
atan2f_Q3 = f85
atan2f_r = f86
atan2f_rsq = f87
atan2f_poly_atan_U = f88
// predicate registers
//atan2f_Pred_Swap = p6 // |y| > |x|
//atan2f_Pred_noSwap = p7 // |y| <= |x|
//atan2f_Pred_Xpos = p8 // x >= 0
//atan2f_Pred_Xneg = p9 // x < 0
.data
.align 16
atan2f_coef_table1:
ASM_TYPE_DIRECTIVE(atan2f_coef_table1,@object)
data8 0xBFD5555512191621 // p1
data8 0x3F522E5D33BC9BAA // p10
data8 0xBFA6E10BA401393F // p7
data8 0x3FB142A73D7C54E3 // p6
data8 0xBFC2473C5145EE38 // p3
data8 0x3FC9997E7AFBFF4E // p2
ASM_SIZE_DIRECTIVE(atan2f_coef_table1)
atan2f_coef_table2:
ASM_TYPE_DIRECTIVE(atan2f_coef_table2,@object)
data8 0xBF7DEAADAA336451 // p9
data8 0x3F97105B4160F86B // p8
data8 0xBFB68EED6A8CFA32 // p5
data8 0x3FBC4F512B1865F5 // p4
data8 0x3ff921fb54442d18 // pi/2
data8 0x400921fb54442d18 // pi
data8 0x3fe921fb54442d18 // pi/4
data8 0x4002d97c7f3321d2 // 3pi/4
ASM_SIZE_DIRECTIVE(atan2f_coef_table2)
.global atan2f
#ifdef _LIBC
.global __atan2f
.global __ieee754_atan2f
#endif
.text
.align 32
atan2f:
.proc atan2f
#ifdef _LIBC
.proc __atan2f
__atan2f:
.proc __ieee754_atan2f
__ieee754_atan2f:
#endif
{ .mfi
alloc r32 = ar.pfs,1,5,4,0
frcpa.s1 atan2f_Z0,p0 = f1,f8 // Approx to 1/y
nop.i 999
}
{ .mfi
addl atan2f_GR_Addr_1 = @ltoff(atan2f_coef_table1),gp
fma.s1 atan2f_xsq = f9,f9,f0
nop.i 999 ;;
}
{ .mfi
ld8 atan2f_GR_Addr_1 = [atan2f_GR_Addr_1]
frcpa.s1 atan2f_A0,p0 = f1,f9 // Approx to 1/x
nop.i 999
}
{ .mfi
nop.m 999
fma.s1 atan2f_ysq = f8,f8,f0
nop.i 999 ;;
}
{ .mfi
nop.m 999
fcmp.ge.s1 p8,p9 = f9,f0 // Set p8 if x>=0, p9 if x<0
nop.i 999
}
{ .mfi
nop.m 999
fma.s1 atan2f_xy = f9,f8,f0
nop.i 999 ;;
}
{ .mfi
add atan2f_GR_Addr_2 = 0x30, atan2f_GR_Addr_1
fmerge.s atan2f_sgn_Y = f8,f1
nop.i 999 ;;
}
{ .mmf
ldfpd atan2f_coef_p1,atan2f_coef_p10 = [atan2f_GR_Addr_1],16
ldfpd atan2f_coef_p9,atan2f_coef_p8 = [atan2f_GR_Addr_2],16
fclass.m p10,p0 = f9,0xe7 // Test x @inf|@snan|@qnan|@zero
}
;;
{ .mfi
ldfpd atan2f_coef_p7,atan2f_coef_p6 = [atan2f_GR_Addr_1],16
fma.s1 atan2f_T_denom = atan2f_Z0,atan2f_xsq,f8
nop.i 999
}
{ .mfi
ldfpd atan2f_coef_p5,atan2f_coef_p4 = [atan2f_GR_Addr_2],16
fma.s1 atan2f_Z = atan2f_Z0,f9,f0
nop.i 999 ;;
}
{ .mfi
ldfpd atan2f_coef_p3,atan2f_coef_p2 = [atan2f_GR_Addr_1],16
fma.s1 atan2f_G_denom = atan2f_A0,atan2f_ysq,f9
nop.i 999
}
{ .mfi
ldfpd atan2f_const_piby2,atan2f_const_pi = [atan2f_GR_Addr_2],16
fma.s1 atan2f_A = atan2f_A0,f8,f0
nop.i 999 ;;
}
{ .mfi
ldfpd atan2f_const_piby4,atan2f_const_3piby4 = [atan2f_GR_Addr_2]
fclass.m p11,p0 = f8,0xe7 // Test y @inf|@snan|@qnan|@zero
nop.i 999
}
{ .mfb
nop.m 999
fnma.s1 atan2f_T_numer = atan2f_Z0,atan2f_xy,f9
(p10) br.cond.spnt ATAN2F_XY_INF_NAN_ZERO ;; // Branch on x nan,inf,zero
}
// p6 if |y|>|x|, p7 if |x|>=|y| , use xsq and ysq for test
{ .mfi
nop.m 999
fcmp.gt.s1 p6,p7 = atan2f_ysq,atan2f_xsq
nop.i 999
}
{ .mfb
nop.m 999
fnma.s1 atan2f_G_numer = atan2f_A0,atan2f_xy,f8
(p11) br.cond.spnt ATAN2F_XY_INF_NAN_ZERO ;; // Branch on y nan,inf,zero
}
{ .mfi
nop.m 999
(p8) fma.s1 atan2f_const_1 = atan2f_sgn_Y,f0,f0
nop.i 999
}
{ .mfi
nop.m 999
(p9) fma.s1 atan2f_const_1 = atan2f_sgn_Y,f1,f0
nop.i 999 ;;
}
{ .mfi
nop.m 999
(p6) fnma.s1 atan2f_U = atan2f_Z,f1,f0
nop.i 999
}
{ .mfi
nop.m 999
(p6) fma.s1 atan2f_Usq = atan2f_Z,atan2f_Z,f0
nop.i 999 ;;
}
{ .mfi
nop.m 999
(p7) fma.s1 atan2f_U = atan2f_A,f1,f0
nop.i 999
}
{ .mfi
nop.m 999
(p7) fma.s1 atan2f_Usq = atan2f_A,atan2f_A,f0
nop.i 999 ;;
}
{ .mfi
nop.m 999
(p6) frcpa.s1 atan2f_Q1,p0 = f1,atan2f_T_denom
nop.i 999
}
{ .mfi
nop.m 999
(p6) fma.s1 atan2f_R_denom = atan2f_T_denom,f1,f0
nop.i 999 ;;
}
{ .mfi
nop.m 999
(p7) frcpa.s1 atan2f_Q1,p0 = f1,atan2f_G_denom
nop.i 999
}
{ .mfi
nop.m 999
(p7) fma.s1 atan2f_R_denom = atan2f_G_denom,f1,f0
nop.i 999 ;;
}
{ .mfi
nop.m 999
(p6) fnma.s1 atan2f_R_numer = atan2f_T_numer,f1,f0
nop.i 999
}
{ .mfi
nop.m 999
(p7) fma.s1 atan2f_R_numer = atan2f_G_numer,f1,f0
nop.i 999 ;;
}
{ .mfi
nop.m 999
(p6) fnma.s1 atan2f_p1rnum = atan2f_T_numer,atan2f_coef_p1,f0
nop.i 999 ;;
}
{ .mfi
nop.m 999
(p7) fma.s1 atan2f_p1rnum = atan2f_G_numer,atan2f_coef_p1,f0
nop.i 999 ;;
}
{ .mfi
nop.m 999
fma.s1 atan2f_U4 = atan2f_Usq,atan2f_Usq,f0
nop.i 999
}
{ .mfi
nop.m 999
fma.s1 atan2f_poly_u109 = atan2f_Usq,atan2f_coef_p10,atan2f_coef_p9
nop.i 999 ;;
}
{ .mfi
nop.m 999
fma.s1 atan2f_poly_u87 = atan2f_Usq,atan2f_coef_p8,atan2f_coef_p7
nop.i 999
}
{ .mfi
nop.m 999
fma.s1 atan2f_poly_u65 = atan2f_Usq,atan2f_coef_p6,atan2f_coef_p5
nop.i 999 ;;
}
{ .mfi
nop.m 999
fma.s1 atan2f_poly_u43 = atan2f_Usq,atan2f_coef_p4,atan2f_coef_p3
nop.i 999
}
{ .mfi
nop.m 999
fnma.s1 atan2f_Q_beta = atan2f_Q1,atan2f_R_denom,f1
nop.i 999 ;;
}
{ .mfi
nop.m 999
fma.s1 atan2f_poly_u21 = atan2f_Usq,atan2f_coef_p2,atan2f_coef_p1
nop.i 999
}
{ .mfi
nop.m 999
fma.s1 atan2f_r = atan2f_Q1,atan2f_R_numer,f0
nop.i 999 ;;
}
{ .mfi
nop.m 999
(p6) fma.s1 atan2f_C = atan2f_sgn_Y,atan2f_const_piby2,f0
nop.i 999
}
{ .mfi
nop.m 999
(p7) fma.s1 atan2f_C = atan2f_const_1,atan2f_const_pi,f0
nop.i 999 ;;
}
{ .mfi
nop.m 999
fma.s1 atan2f_U6 = atan2f_U4,atan2f_Usq,f0
nop.i 999
}
{ .mfi
nop.m 999
fma.s1 atan2f_U8 = atan2f_U4,atan2f_U4,f0
nop.i 999 ;;
}
{ .mfi
nop.m 999
fma.s1 atan2f_poly_u10to7 = atan2f_U4,atan2f_poly_u109,atan2f_poly_u87
nop.i 999
}
{ .mfi
nop.m 999
fma.s1 atan2f_pR = atan2f_p1rnum,atan2f_Q1,f0
nop.i 999 ;;
}
{ .mfi
nop.m 999
fma.s1 atan2f_poly_u6to3 = atan2f_U4,atan2f_poly_u65,atan2f_poly_u43
nop.i 999
}
{ .mfi
nop.m 999
fma.s1 atan2f_Q2 = atan2f_Q1,atan2f_Q_beta,atan2f_Q1
nop.i 999 ;;
}
{ .mfi
nop.m 999
fma.s1 atan2f_Q_beta2 = atan2f_Q_beta,atan2f_Q_beta,f0
nop.i 999
}
{ .mfi
nop.m 999
fma.s1 atan2f_rsq = atan2f_r,atan2f_r,f0
nop.i 999 ;;
}
{ .mfi
nop.m 999
fma.s1 atan2f_poly_u210 = atan2f_Usq,atan2f_poly_u21,f1
nop.i 999 ;;
}
{ .mfi
nop.m 999
fcmp.eq.s0 p8,p0 = f8,f9 // Dummy op to set flag on denormal inputs
nop.i 999
}
{ .mfi
nop.m 999
fma.s1 atan2f_poly_u10to3 = atan2f_U8,atan2f_poly_u10to7,atan2f_poly_u6to3
nop.i 999 ;;
}
{ .mfi
nop.m 999
fma.s1 atan2f_Q3 = atan2f_Q2,atan2f_Q_beta2,atan2f_Q2
nop.i 999
}
{ .mfi
nop.m 999
fma.s1 atan2f_pRC = atan2f_rsq,atan2f_pR,atan2f_C
nop.i 999 ;;
}
{ .mfi
nop.m 999
fma.s1 atan2f_poly_u10to0 = atan2f_U6,atan2f_poly_u10to3,atan2f_poly_u210
nop.i 999 ;;
}
{ .mfi
nop.m 999
fma.s1 atan2f_pQRC = atan2f_R_numer,atan2f_Q3,atan2f_pRC
nop.i 999 ;;
}
{ .mfb
nop.m 999
fma.s.s0 f8 = atan2f_U,atan2f_poly_u10to0,atan2f_pQRC
br.ret.sptk b0 ;;
}
ATAN2F_XY_INF_NAN_ZERO:
{ .mfi
nop.m 999
fclass.m p10,p0 = f8,0xc3 // Is y nan
nop.i 999
}
;;
{ .mfi
nop.m 999
fclass.m p12,p0 = f9,0xc3 // Is x nan
nop.i 999
}
;;
{ .mfi
nop.m 999
fclass.m p6,p0 = f9,0x21 // Is x +inf
nop.i 999
}
{ .mfb
nop.m 999
(p10) fma.s f8 = f9,f8,f0 // Result quietized y if y is nan
(p10) br.ret.spnt b0 // Exit if y is nan
}
;;
{ .mfi
nop.m 999
(p6) fclass.m.unc p7,p8 = f8,0x23 // x +inf, is y inf
nop.i 999
}
{ .mfb
nop.m 999
(p12) fnorm.s f8 = f9 // Result quietized x if x is nan, y not nan
(p12) br.ret.spnt b0 // Exit if x is nan, y not nan
}
;;
// Here if x or y inf, or x or y zero
{ .mfi
nop.m 999
fcmp.eq.s0 p15,p0 = f8,f9 // Dummy op to set flag on denormal inputs
nop.i 999
}
;;
{ .mfi
nop.m 999
fclass.m p11,p12 = f9,0x22 // Is x -inf
nop.i 999
}
{ .mfb
nop.m 999
(p7) fma.s f8 = atan2f_sgn_Y, atan2f_const_piby4,f0 // Result +-pi/4
(p7) br.ret.spnt b0 // Exit if x +inf and y inf
}
;;
{ .mfb
nop.m 999
(p8) fmerge.s f8 = f8,f0 // If x +inf and y not inf, result +-0
(p8) br.ret.spnt b0 // Exit if x +inf and y not inf
}
;;
{ .mfi
nop.m 999
(p12) fclass.m.unc p13,p0 = f8,0x23 // x not -inf, is y inf
nop.i 999
}
;;
{ .mfi
nop.m 999
(p11) fclass.m.unc p14,p15 = f8,0x23 // x -inf, is y inf
nop.i 999
}
;;
{ .mfi
nop.m 999
fclass.m p6,p7 = f9,0x7 // Is x zero
nop.i 999
}
{ .mfb
nop.m 999
(p13) fma.s f8 = atan2f_sgn_Y, atan2f_const_piby2,f0 // Result +-pi/2
(p13) br.ret.spnt b0 // Exit if x not -inf and y inf
}
;;
{ .mfi
nop.m 999
(p14) fma.s f8 = atan2f_sgn_Y, atan2f_const_3piby4,f0 // Result +-3pi/4
nop.i 999
}
{ .mfb
nop.m 999
(p15) fma.s f8 = atan2f_sgn_Y, atan2f_const_pi,f0 // Result +-pi
(p11) br.ret.spnt b0 // Exit if x -inf
}
;;
// Here if x or y zero
{ .mfi
nop.m 999
(p7) fclass.m.unc p8,p9 = f9,0x19 // x not zero, y zero, is x > zero
nop.i 999
}
;;
{ .mfi
nop.m 999
(p6) fclass.m.unc p10,p11 = f8,0x7 // x zero, is y zero
nop.i 999
}
;;
{ .mfi
nop.m 999
(p8) fmerge.s f8 = f8, f0 // x > zero and y zero, result is +-zero
nop.i 999
}
{ .mfb
nop.m 999
(p9) fma.s f8 = atan2f_sgn_Y, atan2f_const_pi,f0 // x < 0, y 0, result +-pi
(p10) br.cond.spnt __libm_error_region // Branch if x zero and y zero
}
;;
{ .mfb
nop.m 999
(p11) fma.s f8 = atan2f_sgn_Y, atan2f_const_piby2,f0 // x zero, y not zero
br.ret.sptk b0 // Final special case exit
}
;;
.endp atan2f
ASM_SIZE_DIRECTIVE(atan2f)
.proc __libm_error_region
__libm_error_region:
.prologue
mov GR_Parameter_TAG = 38
fclass.m p10,p11 = f9,0x5 // @zero | @pos
;;
(p10) fmerge.s f10 = f8, f0
(p11) fma.s f10 = atan2f_sgn_Y, atan2f_const_pi,f0
;;
{ .mfi
add GR_Parameter_Y=-32,sp // Parameter 2 value
nop.f 999
.save ar.pfs,GR_SAVE_PFS
mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
}
{ .mfi
.fframe 64
add sp=-64,sp // Create new stack
nop.f 0
mov GR_SAVE_GP=gp // Save gp
}
;;
{ .mmi
stfs [GR_Parameter_Y] = f9,16 // Store Parameter 2 on stack
add GR_Parameter_X = 16,sp // Parameter 1 address
.save b0, GR_SAVE_B0
mov GR_SAVE_B0=b0 // Save b0
}
;;
.body
{ .mib
stfs [GR_Parameter_X] = f8 // Store Parameter 1 on stack
add GR_Parameter_RESULT = 0,GR_Parameter_Y
nop.b 0 // Parameter 3 address
}
{ .mib
stfs [GR_Parameter_Y] = f10 // Store Parameter 3 on stack
add GR_Parameter_Y = -16,GR_Parameter_Y
br.call.sptk b0=__libm_error_support# // Call error handling function
}
;;
{ .mmi
nop.m 0
nop.m 0
add GR_Parameter_RESULT = 48,sp
};;
{ .mmi
ldfs f8 = [GR_Parameter_RESULT] // Get return result off stack
.restore sp
add sp = 64,sp // Restore stack pointer
mov b0 = GR_SAVE_B0 // Restore return address
}
;;
{ .mib
mov gp = GR_SAVE_GP // Restore gp
mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
br.ret.sptk b0 // Return
}
;;
.endp __libm_error_region
ASM_SIZE_DIRECTIVE(__libm_error_region)
.type __libm_error_support#,@function
.global __libm_error_support#

View File

@ -0,0 +1 @@
/* Not needed. */

1142
sysdeps/ia64/fpu/e_cosh.S Normal file

File diff suppressed because it is too large Load Diff

1133
sysdeps/ia64/fpu/e_coshf.S Normal file

File diff suppressed because it is too large Load Diff

1150
sysdeps/ia64/fpu/e_coshl.S Normal file

File diff suppressed because it is too large Load Diff

815
sysdeps/ia64/fpu/e_exp.S Normal file
View File

@ -0,0 +1,815 @@
.file "exp.s"
// Copyright (c) 2000, 2001, Intel Corporation
// All rights reserved.
//
// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
//
// WARRANTY DISCLAIMER
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
// http://developer.intel.com/opensource.
//
// History
//==============================================================
// 2/02/00 Initial version
// 3/07/00 exp(inf) = inf but now does NOT call error support
// exp(-inf) = 0 but now does NOT call error support
// 4/04/00 Unwind support added
// 8/15/00 Bundle added after call to __libm_error_support to properly
// set [the previously overwritten] GR_Parameter_RESULT.
// 11/30/00 Reworked to shorten main path, widen main path to include all
// args in normal range, and add quick exit for 0, nan, inf.
// 12/05/00 Loaded constants earlier with setf to save 2 cycles.
// API
//==============================================================
// double exp(double)
// Overview of operation
//==============================================================
// Take the input x. w is "how many log2/128 in x?"
// w = x * 128/log2
// n = int(w)
// x = n log2/128 + r + delta
// n = 128M + index_1 + 2^4 index_2
// x = M log2 + (log2/128) index_1 + (log2/8) index_2 + r + delta
// exp(x) = 2^M 2^(index_1/128) 2^(index_2/8) exp(r) exp(delta)
// Construct 2^M
// Get 2^(index_1/128) from table_1;
// Get 2^(index_2/8) from table_2;
// Calculate exp(r) by series
// r = x - n (log2/128)_high
// delta = - n (log2/128)_low
// Calculate exp(delta) as 1 + delta
// Special values
//==============================================================
// exp(+0) = 1.0
// exp(-0) = 1.0
// exp(+qnan) = +qnan
// exp(-qnan) = -qnan
// exp(+snan) = +qnan
// exp(-snan) = -qnan
// exp(-inf) = +0
// exp(+inf) = +inf
// Overfow and Underfow
//=======================
// exp(-x) = smallest double normal when
// x = -708.396 = c086232bdd7abcd2
// exp(x) = largest double normal when
// x = 709.7827 = 40862e42fefa39ef
// Registers used
//==============================================================
// Floating Point registers used:
// f8, input
// f9 -> f15, f32 -> f60
// General registers used:
// r32 -> r60
// Predicate registers used:
// p6 -> p15
#include "libm_support.h"
// Assembly macros
//==============================================================
exp_GR_rshf = r33
EXP_AD_TB1 = r34
EXP_AD_TB2 = r35
EXP_AD_P = r36
exp_GR_N = r37
exp_GR_index_1 = r38
exp_GR_index_2_16 = r39
exp_GR_biased_M = r40
exp_GR_index_1_16 = r41
EXP_AD_T1 = r42
EXP_AD_T2 = r43
exp_GR_sig_inv_ln2 = r44
exp_GR_17ones = r45
exp_GR_one = r46
exp_TB1_size = r47
exp_TB2_size = r48
exp_GR_rshf_2to56 = r49
exp_GR_gt_ln = r50
exp_GR_exp_2tom56 = r51
exp_GR_17ones_m1 = r52
GR_SAVE_B0 = r53
GR_SAVE_PFS = r54
GR_SAVE_GP = r55
GR_SAVE_SP = r56
GR_Parameter_X = r57
GR_Parameter_Y = r58
GR_Parameter_RESULT = r59
GR_Parameter_TAG = r60
FR_X = f10
FR_Y = f1
FR_RESULT = f8
EXP_RSHF_2TO56 = f6
EXP_INV_LN2_2TO63 = f7
EXP_W_2TO56_RSH = f9
EXP_2TOM56 = f11
exp_P4 = f12
exp_P3 = f13
exp_P2 = f14
exp_P1 = f15
exp_ln2_by_128_hi = f33
exp_ln2_by_128_lo = f34
EXP_RSHF = f35
EXP_Nfloat = f36
exp_W = f37
exp_r = f38
exp_f = f39
exp_rsq = f40
exp_rcube = f41
EXP_2M = f42
exp_S1 = f43
exp_T1 = f44
EXP_MIN_DBL_OFLOW_ARG = f45
EXP_MAX_DBL_ZERO_ARG = f46
EXP_MAX_DBL_NORM_ARG = f47
EXP_MAX_DBL_UFLOW_ARG = f48
EXP_MIN_DBL_NORM_ARG = f49
exp_rP4pP3 = f50
exp_P_lo = f51
exp_P_hi = f52
exp_P = f53
exp_S = f54
EXP_NORM_f8 = f56
exp_wre_urm_f8 = f57
exp_ftz_urm_f8 = f57
exp_gt_pln = f58
exp_S2 = f59
exp_T2 = f60
// Data tables
//==============================================================
#ifdef _LIBC
.rodata
#else
.data
#endif
.align 16
// ************* DO NOT CHANGE ORDER OF THESE TABLES ********************
// double-extended 1/ln(2)
// 3fff b8aa 3b29 5c17 f0bb be87fed0691d3e88
// 3fff b8aa 3b29 5c17 f0bc
// For speed the significand will be loaded directly with a movl and setf.sig
// and the exponent will be bias+63 instead of bias+0. Thus subsequent
// computations need to scale appropriately.
// The constant 128/ln(2) is needed for the computation of w. This is also
// obtained by scaling the computations.
//
// Two shifting constants are loaded directly with movl and setf.d.
// 1. EXP_RSHF_2TO56 = 1.1000..00 * 2^(63-7)
// This constant is added to x*1/ln2 to shift the integer part of
// x*128/ln2 into the rightmost bits of the significand.
// The result of this fma is EXP_W_2TO56_RSH.
// 2. EXP_RSHF = 1.1000..00 * 2^(63)
// This constant is subtracted from EXP_W_2TO56_RSH * 2^(-56) to give
// the integer part of w, n, as a floating-point number.
// The result of this fms is EXP_Nfloat.
exp_table_1:
ASM_TYPE_DIRECTIVE(exp_table_1,@object)
data8 0x40862e42fefa39f0 // smallest dbl overflow arg
data8 0xc0874c0000000000 // approx largest arg for zero result
data8 0x40862e42fefa39ef // largest dbl arg to give normal dbl result
data8 0xc086232bdd7abcd3 // largest dbl underflow arg
data8 0xc086232bdd7abcd2 // smallest dbl arg to give normal dbl result
data8 0x0 // pad
data8 0xb17217f7d1cf79ab , 0x00003ff7 // ln2/128 hi
data8 0xc9e3b39803f2f6af , 0x00003fb7 // ln2/128 lo
// Table 1 is 2^(index_1/128) where
// index_1 goes from 0 to 15
data8 0x8000000000000000 , 0x00003FFF
data8 0x80B1ED4FD999AB6C , 0x00003FFF
data8 0x8164D1F3BC030773 , 0x00003FFF
data8 0x8218AF4373FC25EC , 0x00003FFF
data8 0x82CD8698AC2BA1D7 , 0x00003FFF
data8 0x8383594EEFB6EE37 , 0x00003FFF
data8 0x843A28C3ACDE4046 , 0x00003FFF
data8 0x84F1F656379C1A29 , 0x00003FFF
data8 0x85AAC367CC487B15 , 0x00003FFF
data8 0x8664915B923FBA04 , 0x00003FFF
data8 0x871F61969E8D1010 , 0x00003FFF
data8 0x87DB357FF698D792 , 0x00003FFF
data8 0x88980E8092DA8527 , 0x00003FFF
data8 0x8955EE03618E5FDD , 0x00003FFF
data8 0x8A14D575496EFD9A , 0x00003FFF
data8 0x8AD4C6452C728924 , 0x00003FFF
ASM_SIZE_DIRECTIVE(exp_table_1)
// Table 2 is 2^(index_1/8) where
// index_2 goes from 0 to 7
exp_table_2:
ASM_TYPE_DIRECTIVE(exp_table_2,@object)
data8 0x8000000000000000 , 0x00003FFF
data8 0x8B95C1E3EA8BD6E7 , 0x00003FFF
data8 0x9837F0518DB8A96F , 0x00003FFF
data8 0xA5FED6A9B15138EA , 0x00003FFF
data8 0xB504F333F9DE6484 , 0x00003FFF
data8 0xC5672A115506DADD , 0x00003FFF
data8 0xD744FCCAD69D6AF4 , 0x00003FFF
data8 0xEAC0C6E7DD24392F , 0x00003FFF
ASM_SIZE_DIRECTIVE (exp_table_2)
exp_p_table:
ASM_TYPE_DIRECTIVE(exp_p_table,@object)
data8 0x3f8111116da21757 //P_4
data8 0x3fa55555d787761c //P_3
data8 0x3fc5555555555414 //P_2
data8 0x3fdffffffffffd6a //P_1
ASM_SIZE_DIRECTIVE(exp_p_table)
.align 32
.global exp#
.section .text
.proc exp#
.align 32
exp:
#ifdef _LIBC
.global __ieee754_exp#
__ieee754_exp:
#endif
{ .mlx
alloc r32=ar.pfs,1,24,4,0
movl exp_GR_sig_inv_ln2 = 0xb8aa3b295c17f0bc // significand of 1/ln2
}
{ .mlx
addl EXP_AD_TB1 = @ltoff(exp_table_1), gp
movl exp_GR_rshf_2to56 = 0x4768000000000000 ;; // 1.10000 2^(63+56)
}
;;
// We do this fnorm right at the beginning to take any enabled
// faults and to normalize any input unnormals so that SWA is not taken.
{ .mfi
ld8 EXP_AD_TB1 = [EXP_AD_TB1]
fclass.m p8,p0 = f8,0x07 // Test for x=0
mov exp_GR_17ones = 0x1FFFF
}
{ .mfi
mov exp_TB1_size = 0x100
fnorm EXP_NORM_f8 = f8
mov exp_GR_exp_2tom56 = 0xffff-56
}
;;
// Form two constants we need
// 1/ln2 * 2^63 to compute w = x * 1/ln2 * 128
// 1.1000..000 * 2^(63+63-7) to right shift int(w) into the significand
{ .mmf
setf.sig EXP_INV_LN2_2TO63 = exp_GR_sig_inv_ln2 // form 1/ln2 * 2^63
setf.d EXP_RSHF_2TO56 = exp_GR_rshf_2to56 // Form const 1.100 * 2^(63+56)
fclass.m p9,p0 = f8,0x22 // Test for x=-inf
}
;;
{ .mlx
setf.exp EXP_2TOM56 = exp_GR_exp_2tom56 // form 2^-56 for scaling Nfloat
movl exp_GR_rshf = 0x43e8000000000000 // 1.10000 2^63 for right shift
}
{ .mfb
mov exp_TB2_size = 0x80
(p8) fma.d f8 = f1,f1,f0 // quick exit for x=0
(p8) br.ret.spnt b0
;;
}
{ .mfi
ldfpd EXP_MIN_DBL_OFLOW_ARG, EXP_MAX_DBL_ZERO_ARG = [EXP_AD_TB1],16
fclass.m p10,p0 = f8,0x21 // Test for x=+inf
nop.i 999
}
{ .mfb
nop.m 999
(p9) fma.d f8 = f0,f0,f0 // quick exit for x=-inf
(p9) br.ret.spnt b0
;;
}
{ .mmf
ldfpd EXP_MAX_DBL_NORM_ARG, EXP_MAX_DBL_UFLOW_ARG = [EXP_AD_TB1],16
setf.d EXP_RSHF = exp_GR_rshf // Form right shift const 1.100 * 2^63
fclass.m p11,p0 = f8,0xc3 // Test for x=nan
;;
}
{ .mfb
ldfd EXP_MIN_DBL_NORM_ARG = [EXP_AD_TB1],16
nop.f 999
(p10) br.ret.spnt b0 // quick exit for x=+inf
;;
}
{ .mfi
ldfe exp_ln2_by_128_hi = [EXP_AD_TB1],16
nop.f 999
nop.i 999
;;
}
{ .mfb
ldfe exp_ln2_by_128_lo = [EXP_AD_TB1],16
(p11) fmerge.s f8 = EXP_NORM_f8, EXP_NORM_f8
(p11) br.ret.spnt b0 // quick exit for x=nan
;;
}
// After that last load, EXP_AD_TB1 points to the beginning of table 1
// W = X * Inv_log2_by_128
// By adding 1.10...0*2^63 we shift and get round_int(W) in significand.
// We actually add 1.10...0*2^56 to X * Inv_log2 to do the same thing.
{ .mfi
nop.m 999
fma.s1 EXP_W_2TO56_RSH = EXP_NORM_f8, EXP_INV_LN2_2TO63, EXP_RSHF_2TO56
nop.i 999
;;
}
// Divide arguments into the following categories:
// Certain Underflow/zero p11 - -inf < x <= MAX_DBL_ZERO_ARG
// Certain Underflow p12 - MAX_DBL_ZERO_ARG < x <= MAX_DBL_UFLOW_ARG
// Possible Underflow p13 - MAX_DBL_UFLOW_ARG < x < MIN_DBL_NORM_ARG
// Certain Safe - MIN_DBL_NORM_ARG <= x <= MAX_DBL_NORM_ARG
// Possible Overflow p14 - MAX_DBL_NORM_ARG < x < MIN_DBL_OFLOW_ARG
// Certain Overflow p15 - MIN_DBL_OFLOW_ARG <= x < +inf
//
// If the input is really a double arg, then there will never be "Possible
// Underflow" or "Possible Overflow" arguments.
//
{ .mfi
add EXP_AD_TB2 = exp_TB1_size, EXP_AD_TB1
fcmp.ge.s1 p15,p14 = EXP_NORM_f8,EXP_MIN_DBL_OFLOW_ARG
nop.i 999
;;
}
{ .mfi
add EXP_AD_P = exp_TB2_size, EXP_AD_TB2
fcmp.le.s1 p11,p12 = EXP_NORM_f8,EXP_MAX_DBL_ZERO_ARG
nop.i 999
;;
}
{ .mfb
ldfpd exp_P4, exp_P3 = [EXP_AD_P] ,16
(p14) fcmp.gt.unc.s1 p14,p0 = EXP_NORM_f8,EXP_MAX_DBL_NORM_ARG
(p15) br.cond.spnt L(EXP_CERTAIN_OVERFLOW)
;;
}
// Nfloat = round_int(W)
// The signficand of EXP_W_2TO56_RSH contains the rounded integer part of W,
// as a twos complement number in the lower bits (that is, it may be negative).
// That twos complement number (called N) is put into exp_GR_N.
// Since EXP_W_2TO56_RSH is scaled by 2^56, it must be multiplied by 2^-56
// before the shift constant 1.10000 * 2^63 is subtracted to yield EXP_Nfloat.
// Thus, EXP_Nfloat contains the floating point version of N
{ .mfi
nop.m 999
(p12) fcmp.le.unc p12,p0 = EXP_NORM_f8,EXP_MAX_DBL_UFLOW_ARG
nop.i 999
}
{ .mfb
ldfpd exp_P2, exp_P1 = [EXP_AD_P]
fms.s1 EXP_Nfloat = EXP_W_2TO56_RSH, EXP_2TOM56, EXP_RSHF
(p11) br.cond.spnt L(EXP_CERTAIN_UNDERFLOW_ZERO)
;;
}
{ .mfi
getf.sig exp_GR_N = EXP_W_2TO56_RSH
(p13) fcmp.lt.unc p13,p0 = EXP_NORM_f8,EXP_MIN_DBL_NORM_ARG
nop.i 999
;;
}
// exp_GR_index_1 has index_1
// exp_GR_index_2_16 has index_2 * 16
// exp_GR_biased_M has M
// exp_GR_index_1_16 has index_1 * 16
// r2 has true M
{ .mfi
and exp_GR_index_1 = 0x0f, exp_GR_N
fnma.s1 exp_r = EXP_Nfloat, exp_ln2_by_128_hi, EXP_NORM_f8
shr r2 = exp_GR_N, 0x7
}
{ .mfi
and exp_GR_index_2_16 = 0x70, exp_GR_N
fnma.s1 exp_f = EXP_Nfloat, exp_ln2_by_128_lo, f1
nop.i 999
;;
}
// EXP_AD_T1 has address of T1
// EXP_AD_T2 has address if T2
{ .mmi
addl exp_GR_biased_M = 0xffff, r2
add EXP_AD_T2 = EXP_AD_TB2, exp_GR_index_2_16
shladd EXP_AD_T1 = exp_GR_index_1, 4, EXP_AD_TB1
;;
}
// Create Scale = 2^M
// r = x - Nfloat * ln2_by_128_hi
// f = 1 - Nfloat * ln2_by_128_lo
{ .mmi
setf.exp EXP_2M = exp_GR_biased_M
ldfe exp_T2 = [EXP_AD_T2]
nop.i 999
;;
}
// Load T1 and T2
{ .mfi
ldfe exp_T1 = [EXP_AD_T1]
nop.f 999
nop.i 999
;;
}
{ .mfi
nop.m 999
fma.s1 exp_rsq = exp_r, exp_r, f0
nop.i 999
}
{ .mfi
nop.m 999
fma.s1 exp_rP4pP3 = exp_r, exp_P4, exp_P3
nop.i 999
;;
}
{ .mfi
nop.m 999
fma.s1 exp_rcube = exp_r, exp_rsq, f0
nop.i 999
}
{ .mfi
nop.m 999
fma.s1 exp_P_lo = exp_r, exp_rP4pP3, exp_P2
nop.i 999
;;
}
{ .mfi
nop.m 999
fma.s1 exp_P_hi = exp_rsq, exp_P1, exp_r
nop.i 999
}
{ .mfi
nop.m 999
fma.s1 exp_S2 = exp_f,exp_T2,f0
nop.i 999
;;
}
{ .mfi
nop.m 999
fma.s1 exp_S1 = EXP_2M,exp_T1,f0
nop.i 999
;;
}
{ .mfi
nop.m 999
fma.s1 exp_P = exp_rcube, exp_P_lo, exp_P_hi
nop.i 999
;;
}
{ .mfi
nop.m 999
fma.s1 exp_S = exp_S1,exp_S2,f0
nop.i 999
;;
}
{ .bbb
(p12) br.cond.spnt L(EXP_CERTAIN_UNDERFLOW)
(p13) br.cond.spnt L(EXP_POSSIBLE_UNDERFLOW)
(p14) br.cond.spnt L(EXP_POSSIBLE_OVERFLOW)
;;
}
{ .mfb
nop.m 999
fma.d f8 = exp_S, exp_P, exp_S
br.ret.sptk b0 ;; // Normal path exit
}
L(EXP_POSSIBLE_OVERFLOW):
// We got an answer. EXP_MAX_DBL_NORM_ARG < x < EXP_MIN_DBL_OFLOW_ARG
// overflow is a possibility, not a certainty
{ .mfi
nop.m 999
fsetc.s2 0x7F,0x42
nop.i 999 ;;
}
{ .mfi
nop.m 999
fma.d.s2 exp_wre_urm_f8 = exp_S, exp_P, exp_S
nop.i 999 ;;
}
// We define an overflow when the answer with
// WRE set
// user-defined rounding mode
// is ldn +1
// Is the exponent 1 more than the largest double?
// If so, go to ERROR RETURN, else get the answer and
// leave.
// Largest double is 7FE (biased double)
// 7FE - 3FF + FFFF = 103FE
// Create + largest_double_plus_ulp
// Create - largest_double_plus_ulp
// Calculate answer with WRE set.
// Cases when answer is ldn+1 are as follows:
// ldn ldn+1
// --+----------|----------+------------
// |
// +inf +inf -inf
// RN RN
// RZ
{ .mfi
nop.m 999
fsetc.s2 0x7F,0x40
mov exp_GR_gt_ln = 0x103ff ;;
}
{ .mfi
setf.exp exp_gt_pln = exp_GR_gt_ln
nop.f 999
nop.i 999 ;;
}
{ .mfi
nop.m 999
fcmp.ge.unc.s1 p6, p0 = exp_wre_urm_f8, exp_gt_pln
nop.i 999 ;;
}
{ .mfb
nop.m 999
nop.f 999
(p6) br.cond.spnt L(EXP_CERTAIN_OVERFLOW) ;; // Branch if really overflow
}
{ .mfb
nop.m 999
fma.d f8 = exp_S, exp_P, exp_S
br.ret.sptk b0 ;; // Exit if really no overflow
}
L(EXP_CERTAIN_OVERFLOW):
{ .mmi
sub exp_GR_17ones_m1 = exp_GR_17ones, r0, 1 ;;
setf.exp f9 = exp_GR_17ones_m1
nop.i 999 ;;
}
{ .mfi
nop.m 999
fmerge.s FR_X = f8,f8
nop.i 999
}
{ .mfb
mov GR_Parameter_TAG = 14
fma.d FR_RESULT = f9, f9, f0 // Set I,O and +INF result
br.cond.sptk __libm_error_region ;;
}
L(EXP_POSSIBLE_UNDERFLOW):
// We got an answer. EXP_MAX_DBL_UFLOW_ARG < x < EXP_MIN_DBL_NORM_ARG
// underflow is a possibility, not a certainty
// We define an underflow when the answer with
// ftz set
// is zero (tiny numbers become zero)
// Notice (from below) that if we have an unlimited exponent range,
// then there is an extra machine number E between the largest denormal and
// the smallest normal.
// So if with unbounded exponent we round to E or below, then we are
// tiny and underflow has occurred.
// But notice that you can be in a situation where we are tiny, namely
// rounded to E, but when the exponent is bounded we round to smallest
// normal. So the answer can be the smallest normal with underflow.
// E
// -----+--------------------+--------------------+-----
// | | |
// 1.1...10 2^-3fff 1.1...11 2^-3fff 1.0...00 2^-3ffe
// 0.1...11 2^-3ffe (biased, 1)
// largest dn smallest normal
{ .mfi
nop.m 999
fsetc.s2 0x7F,0x41
nop.i 999 ;;
}
{ .mfi
nop.m 999
fma.d.s2 exp_ftz_urm_f8 = exp_S, exp_P, exp_S
nop.i 999 ;;
}
{ .mfi
nop.m 999
fsetc.s2 0x7F,0x40
nop.i 999 ;;
}
{ .mfi
nop.m 999
fcmp.eq.unc.s1 p6, p0 = exp_ftz_urm_f8, f0
nop.i 999 ;;
}
{ .mfb
nop.m 999
nop.f 999
(p6) br.cond.spnt L(EXP_CERTAIN_UNDERFLOW) ;; // Branch if really underflow
}
{ .mfb
nop.m 999
fma.d f8 = exp_S, exp_P, exp_S
br.ret.sptk b0 ;; // Exit if really no underflow
}
L(EXP_CERTAIN_UNDERFLOW):
{ .mfi
nop.m 999
fmerge.s FR_X = f8,f8
nop.i 999
}
{ .mfb
mov GR_Parameter_TAG = 15
fma.d FR_RESULT = exp_S, exp_P, exp_S // Set I,U and tiny result
br.cond.sptk __libm_error_region ;;
}
L(EXP_CERTAIN_UNDERFLOW_ZERO):
{ .mmi
mov exp_GR_one = 1 ;;
setf.exp f9 = exp_GR_one
nop.i 999 ;;
}
{ .mfi
nop.m 999
fmerge.s FR_X = f8,f8
nop.i 999
}
{ .mfb
mov GR_Parameter_TAG = 15
fma.d FR_RESULT = f9, f9, f0 // Set I,U and tiny (+0.0) result
br.cond.sptk __libm_error_region ;;
}
.endp exp
ASM_SIZE_DIRECTIVE(exp)
.proc __libm_error_region
__libm_error_region:
.prologue
{ .mfi
add GR_Parameter_Y=-32,sp // Parameter 2 value
nop.f 0
.save ar.pfs,GR_SAVE_PFS
mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
}
{ .mfi
.fframe 64
add sp=-64,sp // Create new stack
nop.f 0
mov GR_SAVE_GP=gp // Save gp
};;
{ .mmi
stfd [GR_Parameter_Y] = FR_Y,16 // STORE Parameter 2 on stack
add GR_Parameter_X = 16,sp // Parameter 1 address
.save b0, GR_SAVE_B0
mov GR_SAVE_B0=b0 // Save b0
};;
.body
{ .mib
stfd [GR_Parameter_X] = FR_X // STORE Parameter 1 on stack
add GR_Parameter_RESULT = 0,GR_Parameter_Y // Parameter 3 address
nop.b 0
}
{ .mib
stfd [GR_Parameter_Y] = FR_RESULT // STORE Parameter 3 on stack
add GR_Parameter_Y = -16,GR_Parameter_Y
br.call.sptk b0=__libm_error_support# // Call error handling function
};;
{ .mmi
nop.m 0
nop.m 0
add GR_Parameter_RESULT = 48,sp
};;
{ .mmi
ldfd f8 = [GR_Parameter_RESULT] // Get return result off stack
.restore sp
add sp = 64,sp // Restore stack pointer
mov b0 = GR_SAVE_B0 // Restore return address
};;
{ .mib
mov gp = GR_SAVE_GP // Restore gp
mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
br.ret.sptk b0 // Return
};;
.endp __libm_error_region
ASM_SIZE_DIRECTIVE(__libm_error_region)
.type __libm_error_support#,@function
.global __libm_error_support#

768
sysdeps/ia64/fpu/e_expf.S Normal file
View File

@ -0,0 +1,768 @@
.file "expf.s"
// Copyright (c) 2000, 2001, Intel Corporation
// All rights reserved.
//
// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
//
// WARRANTY DISCLAIMER
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
// http://developer.intel.com/opensource.
// History
//==============================================================
// 4/04/00 Unwind update
// 4/04/00 Unwind support added
// 8/15/00 Bundle added after call to __libm_error_support to properly
// set [the previously overwritten] GR_Parameter_RESULT.
// 8/21/00 Improvements to save 2 cycles on main path, and shorten x=0 case
// 12/07/00 Widen main path, shorten x=inf, nan paths
//
#include "libm_support.h"
// Assembly macros
//==============================================================
// integer registers used
exp_GR_0x0f = r33
exp_GR_0xf0 = r34
EXP_AD_P_1 = r36
EXP_AD_P_2 = r37
EXP_AD_T1 = r38
EXP_AD_T2 = r39
exp_GR_Mint = r40
exp_GR_Mint_p_128 = r41
exp_GR_Ind1 = r42
EXP_AD_M1 = r43
exp_GR_Ind2 = r44
EXP_AD_M2 = r45
exp_GR_min_oflow = r46
exp_GR_max_zero = r47
exp_GR_max_norm = r48
exp_GR_max_uflow = r49
exp_GR_min_norm = r50
exp_GR_17ones = r51
exp_GR_gt_ln = r52
exp_GR_T2_size = r53
exp_GR_17ones_m1 = r56
exp_GR_one = r57
GR_SAVE_B0 = r53
GR_SAVE_PFS = r55
GR_SAVE_GP = r54
GR_Parameter_X = r59
GR_Parameter_Y = r60
GR_Parameter_RESULT = r61
GR_Parameter_TAG = r62
FR_X = f10
FR_Y = f1
FR_RESULT = f8
// floating point registers used
EXP_MIN_SGL_OFLOW_ARG = f11
EXP_MAX_SGL_ZERO_ARG = f12
EXP_MAX_SGL_NORM_ARG = f13
EXP_MAX_SGL_UFLOW_ARG = f14
EXP_MIN_SGL_NORM_ARG = f15
exp_coeff_P5 = f32
exp_coeff_P6 = f33
exp_coeff_P3 = f34
exp_coeff_P4 = f35
exp_coeff_P1 = f36
exp_coeff_P2 = f37
exp_Mx = f38
exp_Mfloat = f39
exp_R = f40
exp_P1 = f41
exp_P2 = f42
exp_P3 = f43
exp_Rsq = f44
exp_R4 = f45
exp_P4 = f46
exp_P5 = f47
exp_P6 = f48
exp_P7 = f49
exp_T1 = f50
exp_T2 = f51
exp_T = f52
exp_A = f53
exp_norm_f8 = f54
exp_wre_urm_f8 = f55
exp_ftz_urm_f8 = f56
exp_gt_pln = f57
#ifdef _LIBC
.rodata
#else
.data
#endif
.align 16
exp_coeff_1_table:
ASM_TYPE_DIRECTIVE(exp_coeff_1_table,@object)
data8 0x3F56F35FDE4F8563 // p5
data8 0x3F2A378BEFECCFDD // p6
data8 0x3FE00000258C581D // p1
data8 0x3FC555557AE7B3D4 // p2
ASM_SIZE_DIRECTIVE(exp_coeff_1_table)
exp_coeff_2_table:
ASM_TYPE_DIRECTIVE(exp_coeff_2_table,@object)
data8 0x3FA5551BB6592FAE // p3
data8 0x3F8110E8EBFFD485 // p4
ASM_SIZE_DIRECTIVE(exp_coeff_2_table)
exp_T2_table:
ASM_TYPE_DIRECTIVE(exp_T2_table,@object)
data8 0xa175cf9cd7d85844 , 0x00003f46 // exp(-128)
data8 0xdb7279415a1f9eed , 0x00003f47 // exp(-127)
data8 0x95213b242bd8ca5f , 0x00003f49 // exp(-126)
data8 0xcab03c968c989f83 , 0x00003f4a // exp(-125)
data8 0x89bdb674702961ad , 0x00003f4c // exp(-124)
data8 0xbb35a2eec278be35 , 0x00003f4d // exp(-123)
data8 0xfe71b17f373e7e7a , 0x00003f4e // exp(-122)
data8 0xace9a6ec52a39b63 , 0x00003f50 // exp(-121)
data8 0xeb03423fe393cf1c , 0x00003f51 // exp(-120)
data8 0x9fb52c5bcaef1693 , 0x00003f53 // exp(-119)
data8 0xd910b6377ed60bf1 , 0x00003f54 // exp(-118)
data8 0x9382dad8a9fdbfe4 , 0x00003f56 // exp(-117)
data8 0xc87d0a84dea869a3 , 0x00003f57 // exp(-116)
data8 0x883efb4c6d1087b0 , 0x00003f59 // exp(-115)
data8 0xb92d7373dce9a502 , 0x00003f5a // exp(-114)
data8 0xfbaeb020577fb0cb , 0x00003f5b // exp(-113)
ASM_SIZE_DIRECTIVE(exp_T2_table)
exp_T1_table:
ASM_TYPE_DIRECTIVE(exp_T1_table,@object)
data8 0x8000000000000000 , 0x00003fff // exp(16 * 0)
data8 0x87975e8540010249 , 0x00004016 // exp(16 * 1)
data8 0x8fa1fe625b3163ec , 0x0000402d // exp(16 * 2)
data8 0x9826b576512a59d7 , 0x00004044 // exp(16 * 3)
data8 0xa12cc167acbe6902 , 0x0000405b // exp(16 * 4)
data8 0xaabbcdcc279f59e4 , 0x00004072 // exp(16 * 5)
data8 0xb4dbfaadc045d16f , 0x00004089 // exp(16 * 6)
data8 0xbf95e372ccdbf146 , 0x000040a0 // exp(16 * 7)
data8 0xcaf2a62eea10bbfb , 0x000040b7 // exp(16 * 8)
data8 0xd6fbeb62fddbd340 , 0x000040ce // exp(16 * 9)
data8 0xe3bbee32e4a440ea , 0x000040e5 // exp(16 * 10)
data8 0xf13d8517c34199a8 , 0x000040fc // exp(16 * 11)
data8 0xff8c2b166241eedd , 0x00004113 // exp(16 * 12)
data8 0x875a04c0b38d6129 , 0x0000412b // exp(16 * 13)
data8 0x8f610127db6774d7 , 0x00004142 // exp(16 * 14)
data8 0x97e1dd87e5c20bb6 , 0x00004159 // exp(16 * 15)
ASM_SIZE_DIRECTIVE(exp_T1_table)
// Argument Reduction
// exp_Mx = (int)f8 ==> The value of f8 rounded to int is placed into the
// significand of exp_Mx as a two's
// complement number.
// Later we want to have exp_Mx in a general register. Do this with a getf.sig
// and call the general register exp_GR_Mint
// exp_Mfloat = (float)(int)f8 ==> the two's complement number in
// significand of exp_Mx is turned
// into a floating point number.
// R = 1 - exp_Mfloat ==> reduced argument
// Core Approximation
// Calculate a series in R
// R * p6 + p5
// R * p4 + p3
// R * p2 + p1
// R^2
// R^4
// R^2(R * p6 + p5) + (R * p4 + p3)
// R^2(R * p2 + p1)
// R^4(R^2(R * p6 + p5) + (R * p4 + p3)) + (R^2(R * p2 + p1))
// R + 1
// exp(R) = (1 + R) + R^4(R^2(R * p6 + p5) + (R * p4 + p3)) + (R^2(R * p2 + p1))
// exp(R) = 1 + R + R^2 * p1 + R^3 * p2 + R^4 * p3 + R^5 * p4 + R^6 * p5 + R^7 * p6
// Reconstruction
// signficand of exp_Mx is two's complement,
// -103 < x < 89
// The smallest single denormal is 2^-149 = ssdn
// For e^x = ssdn
// x = log(ssdn) = -103.279
// But with rounding result goes to ssdn until -103.972079
// The largest single normal is 1.<23 1's> 2^126 ~ 2^127 = lsn
// For e^x = lsn
// x = log(lsn) = 88.7228
//
// expf overflows when x > 42b17218 = 88.7228
// expf returns largest single denormal when x = c2aeac50
// expf goes to zero when x < c2cff1b5
// Consider range of 8-bit two's complement, -128 ---> 127
// Add 128; range becomes 0 ---> 255
// The number (=i) in 0 ---> 255 is used as offset into two tables.
// i = abcd efgh = abcd * 16 + efgh = i1 * 16 + i2
// i1 = (exp_GR_Mint + 128) & 0xf0 (show 0xf0 as -0x10 to avoid assembler error)
// (The immediate in the AND is an 8-bit two's complement)
// i1 = i1 + start of T1 table (EXP_AD_T1)
// Note that the entries in T1 are double-extended numbers on 16-byte boundaries
// and that i1 is already shifted left by 16 after the AND.
// i2 must be shifted left by 4 before adding to the start of the table.
// i2 = ((exp_GR_Mint + 128) & 0x0f) << 4
// i2 = i2 + start of T2 table (EXP_AD_T2)
// T = T1 * T2
// A = T * (1 + R)
// answer = T * (R^2 * p1 + R^3 * p2 + R^4 * p3 + R^5 * p4 + R^6 * p5 + R^7 * p6) +
// T * (1 + R)
// = T * exp(R)
.global expf#
.section .text
.proc expf#
.align 32
expf:
#ifdef _LIBC
.global __ieee754_expf#
__ieee754_expf:
#endif
{ .mfi
alloc r32 = ar.pfs,1,26,4,0
fcvt.fx.s1 exp_Mx = f8
mov exp_GR_17ones = 0x1FFFF
}
{ .mlx
addl EXP_AD_P_1 = @ltoff(exp_coeff_1_table),gp
movl exp_GR_min_oflow = 0x42b17218
}
;;
// Fnorm done to take any enabled faults
{ .mfi
ld8 EXP_AD_P_1 = [EXP_AD_P_1]
fclass.m p6,p0 = f8, 0x07 //@zero
nop.i 999
}
{ .mfi
add exp_GR_max_norm = -1, exp_GR_min_oflow // 0x42b17217
fnorm exp_norm_f8 = f8
nop.i 999
}
;;
{ .mfi
setf.s EXP_MIN_SGL_OFLOW_ARG = exp_GR_min_oflow // 0x42b17218
fclass.m p7,p0 = f8, 0x22 // Test for x=-inf
mov exp_GR_0xf0 = 0x0f0
}
{ .mlx
setf.s EXP_MAX_SGL_NORM_ARG = exp_GR_max_norm
movl exp_GR_max_zero = 0xc2cff1b5
}
;;
{ .mlx
mov exp_GR_0x0f = 0x00f
movl exp_GR_max_uflow = 0xc2aeac50
}
{ .mfb
nop.m 999
(p6) fma.s f8 = f1,f1,f0
(p6) br.ret.spnt b0 // quick exit for x=0
}
;;
{ .mfi
setf.s EXP_MAX_SGL_ZERO_ARG = exp_GR_max_zero
fclass.m p8,p0 = f8, 0x21 // Test for x=+inf
adds exp_GR_min_norm = 1, exp_GR_max_uflow // 0xc2aeac51
}
{ .mfb
ldfpd exp_coeff_P5,exp_coeff_P6 = [EXP_AD_P_1],16
(p7) fma.s f8 = f0,f0,f0
(p7) br.ret.spnt b0 // quick exit for x=-inf
}
;;
{ .mmf
ldfpd exp_coeff_P1,exp_coeff_P2 = [EXP_AD_P_1],16
setf.s EXP_MAX_SGL_UFLOW_ARG = exp_GR_max_uflow
fclass.m p9,p0 = f8, 0xc3 // Test for x=nan
}
;;
{ .mmb
ldfpd exp_coeff_P3,exp_coeff_P4 = [EXP_AD_P_1],16
setf.s EXP_MIN_SGL_NORM_ARG = exp_GR_min_norm
(p8) br.ret.spnt b0 // quick exit for x=+inf
}
;;
// EXP_AD_P_1 now points to exp_T2_table
{ .mfi
mov exp_GR_T2_size = 0x100
fcvt.xf exp_Mfloat = exp_Mx
nop.i 999
}
;;
{ .mfb
getf.sig exp_GR_Mint = exp_Mx
(p9) fmerge.s f8 = exp_norm_f8, exp_norm_f8
(p9) br.ret.spnt b0 // quick exit for x=nan
}
;;
{ .mmi
nop.m 999
mov EXP_AD_T2 = EXP_AD_P_1
add EXP_AD_T1 = exp_GR_T2_size,EXP_AD_P_1 ;;
}
{ .mmi
adds exp_GR_Mint_p_128 = 0x80,exp_GR_Mint ;;
and exp_GR_Ind1 = exp_GR_Mint_p_128, exp_GR_0xf0
and exp_GR_Ind2 = exp_GR_Mint_p_128, exp_GR_0x0f ;;
}
// Divide arguments into the following categories:
// Certain Underflow/zero p11 - -inf < x <= MAX_SGL_ZERO_ARG
// Certain Underflow p12 - MAX_SGL_ZERO_ARG < x <= MAX_SGL_UFLOW_ARG
// Possible Underflow p13 - MAX_SGL_UFLOW_ARG < x < MIN_SGL_NORM_ARG
// Certain Safe - MIN_SGL_NORM_ARG <= x <= MAX_SGL_NORM_ARG
// Possible Overflow p14 - MAX_SGL_NORM_ARG < x < MIN_SGL_OFLOW_ARG
// Certain Overflow p15 - MIN_SGL_OFLOW_ARG <= x < +inf
//
// If the input is really a single arg, then there will never be "Possible
// Underflow" or "Possible Overflow" arguments.
//
{ .mfi
add EXP_AD_M1 = exp_GR_Ind1,EXP_AD_T1
fcmp.ge.s1 p15,p14 = exp_norm_f8,EXP_MIN_SGL_OFLOW_ARG
nop.i 999
}
{ .mfi
shladd EXP_AD_M2 = exp_GR_Ind2,4,EXP_AD_T2
fms.s1 exp_R = f1,f8,exp_Mfloat
nop.i 999 ;;
}
{ .mfi
ldfe exp_T1 = [EXP_AD_M1]
fcmp.le.s1 p11,p12 = exp_norm_f8,EXP_MAX_SGL_ZERO_ARG
nop.i 999 ;;
}
{ .mfb
ldfe exp_T2 = [EXP_AD_M2]
(p14) fcmp.gt.s1 p14,p0 = exp_norm_f8,EXP_MAX_SGL_NORM_ARG
(p15) br.cond.spnt L(EXP_CERTAIN_OVERFLOW) ;;
}
{ .mfb
nop.m 999
(p12) fcmp.le.s1 p12,p0 = exp_norm_f8,EXP_MAX_SGL_UFLOW_ARG
(p11) br.cond.spnt L(EXP_CERTAIN_UNDERFLOW_ZERO)
}
;;
{ .mfi
nop.m 999
(p13) fcmp.lt.s1 p13,p0 = exp_norm_f8,EXP_MIN_SGL_NORM_ARG
nop.i 999
}
;;
{ .mfi
nop.m 999
fma.s1 exp_Rsq = exp_R,exp_R,f0
nop.i 999
}
{ .mfi
nop.m 999
fma.s1 exp_P3 = exp_R,exp_coeff_P2,exp_coeff_P1
nop.i 999
}
;;
{ .mfi
nop.m 999
fma.s1 exp_P1 = exp_R,exp_coeff_P6,exp_coeff_P5
nop.i 999
}
{ .mfi
nop.m 999
fma.s1 exp_P2 = exp_R,exp_coeff_P4,exp_coeff_P3
nop.i 999
}
;;
{ .mfi
nop.m 999
fma.s1 exp_P7 = f1,exp_R,f1
nop.i 999
}
;;
{ .mfi
nop.m 999
fma.s1 exp_P5 = exp_Rsq,exp_P3,f0
nop.i 999
}
{ .mfi
nop.m 999
fma.s1 exp_R4 = exp_Rsq,exp_Rsq,f0
nop.i 999
}
;;
{ .mfi
nop.m 999
fma.s1 exp_T = exp_T1,exp_T2,f0
nop.i 999
}
{ .mfi
nop.m 999
fma.s1 exp_P4 = exp_Rsq,exp_P1,exp_P2
nop.i 999
}
;;
{ .mfi
nop.m 999
fma.s1 exp_A = exp_T,exp_P7,f0
nop.i 999
}
{ .mfi
nop.m 999
fma.s1 exp_P6 = exp_R4,exp_P4,exp_P5
nop.i 999
}
;;
{ .bbb
(p12) br.cond.spnt L(EXP_CERTAIN_UNDERFLOW)
(p13) br.cond.spnt L(EXP_POSSIBLE_UNDERFLOW)
(p14) br.cond.spnt L(EXP_POSSIBLE_OVERFLOW)
}
;;
{ .mfb
nop.m 999
fma.s f8 = exp_T,exp_P6,exp_A
br.ret.sptk b0
}
;;
L(EXP_POSSIBLE_OVERFLOW):
// We got an answer. EXP_MAX_SGL_NORM_ARG < x < EXP_MIN_SGL_OFLOW_ARG
// overflow is a possibility, not a certainty
// Set wre in s2 and perform the last operation with s2
// We define an overflow when the answer with
// WRE set
// user-defined rounding mode
// is lsn +1
// Is the exponent 1 more than the largest single?
// If so, go to ERROR RETURN, else (no overflow) get the answer and
// leave.
// Largest single is FE (biased single)
// FE - 7F + FFFF = 1007E
// Create + largest_single_plus_ulp
// Create - largest_single_plus_ulp
// Calculate answer with WRE set.
// Cases when answer is lsn+1 are as follows:
// midpoint
// |
// lsn | lsn+1
// --+----------|----------+------------
// |
// +inf +inf -inf
// RN RN
// RZ
// exp_gt_pln contains the floating point number lsn+1.
// The setf.exp puts 0x1007f in the exponent and 0x800... in the significand.
// If the answer is >= lsn+1, we have overflowed.
// Then p6 is TRUE. Set the overflow tag, save input in FR_X,
// do the final calculation for IEEE result, and branch to error return.
{ .mfi
mov exp_GR_gt_ln = 0x1007F
fsetc.s2 0x7F,0x42
nop.i 999
}
;;
{ .mfi
setf.exp exp_gt_pln = exp_GR_gt_ln
fma.s.s2 exp_wre_urm_f8 = exp_T, exp_P6, exp_A
nop.i 999
}
;;
{ .mfi
nop.m 999
fsetc.s2 0x7F,0x40
nop.i 999
}
;;
{ .mfi
nop.m 999
fcmp.ge.unc.s1 p6, p0 = exp_wre_urm_f8, exp_gt_pln
nop.i 999
}
;;
{ .mfb
nop.m 999
nop.f 999
(p6) br.cond.spnt L(EXP_CERTAIN_OVERFLOW) // Branch if really overflow
}
;;
{ .mfb
nop.m 999
fma.s f8 = exp_T, exp_P6, exp_A
br.ret.sptk b0 // Exit if really no overflow
}
;;
L(EXP_CERTAIN_OVERFLOW):
{ .mmi
sub exp_GR_17ones_m1 = exp_GR_17ones, r0, 1 ;;
setf.exp f9 = exp_GR_17ones_m1
nop.i 999 ;;
}
{ .mfi
nop.m 999
fmerge.s FR_X = f8,f8
nop.i 999
}
{ .mfb
mov GR_Parameter_TAG = 16
fma.s FR_RESULT = f9, f9, f0 // Set I,O and +INF result
br.cond.sptk __libm_error_region ;;
}
L(EXP_POSSIBLE_UNDERFLOW):
// We got an answer. EXP_MAX_SGL_UFLOW_ARG < x < EXP_MIN_SGL_NORM_ARG
// underflow is a possibility, not a certainty
// We define an underflow when the answer with
// ftz set
// is zero (tiny numbers become zero)
// Notice (from below) that if we have an unlimited exponent range,
// then there is an extra machine number E between the largest denormal and
// the smallest normal.
// So if with unbounded exponent we round to E or below, then we are
// tiny and underflow has occurred.
// But notice that you can be in a situation where we are tiny, namely
// rounded to E, but when the exponent is bounded we round to smallest
// normal. So the answer can be the smallest normal with underflow.
// E
// -----+--------------------+--------------------+-----
// | | |
// 1.1...10 2^-7f 1.1...11 2^-7f 1.0...00 2^-7e
// 0.1...11 2^-7e (biased, 1)
// largest dn smallest normal
// If the answer is = 0, we have underflowed.
// Then p6 is TRUE. Set the underflow tag, save input in FR_X,
// do the final calculation for IEEE result, and branch to error return.
{ .mfi
nop.m 999
fsetc.s2 0x7F,0x41
nop.i 999
}
;;
{ .mfi
nop.m 999
fma.s.s2 exp_ftz_urm_f8 = exp_T, exp_P6, exp_A
nop.i 999
}
;;
{ .mfi
nop.m 999
fsetc.s2 0x7F,0x40
nop.i 999
}
;;
{ .mfi
nop.m 999
fcmp.eq.unc.s1 p6, p0 = exp_ftz_urm_f8, f0
nop.i 999
}
;;
{ .mfb
nop.m 999
nop.f 999
(p6) br.cond.spnt L(EXP_CERTAIN_UNDERFLOW) // Branch if really underflow
}
;;
{ .mfb
nop.m 999
fma.s f8 = exp_T, exp_P6, exp_A
br.ret.sptk b0 // Exit if really no underflow
}
;;
L(EXP_CERTAIN_UNDERFLOW):
{ .mfi
nop.m 999
fmerge.s FR_X = f8,f8
nop.i 999
}
{ .mfb
mov GR_Parameter_TAG = 17
fma.s FR_RESULT = exp_T, exp_P6, exp_A // Set I,U and tiny result
br.cond.sptk __libm_error_region ;;
}
L(EXP_CERTAIN_UNDERFLOW_ZERO):
{ .mmi
mov exp_GR_one = 1 ;;
setf.exp f9 = exp_GR_one
nop.i 999 ;;
}
{ .mfi
nop.m 999
fmerge.s FR_X = f8,f8
nop.i 999
}
{ .mfb
mov GR_Parameter_TAG = 17
fma.s FR_RESULT = f9, f9, f0 // Set I,U and tiny (+0.0) result
br.cond.sptk __libm_error_region ;;
}
.endp expf
ASM_SIZE_DIRECTIVE(expf)
.proc __libm_error_region
__libm_error_region:
.prologue
{ .mfi
add GR_Parameter_Y=-32,sp // Parameter 2 value
nop.f 999
.save ar.pfs,GR_SAVE_PFS
mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
}
{ .mfi
.fframe 64
add sp=-64,sp // Create new stack
nop.f 0
mov GR_SAVE_GP=gp // Save gp
};;
{ .mmi
stfs [GR_Parameter_Y] = FR_Y,16 // Store Parameter 2 on stack
add GR_Parameter_X = 16,sp // Parameter 1 address
.save b0, GR_SAVE_B0
mov GR_SAVE_B0=b0 // Save b0
};;
.body
{ .mfi
stfs [GR_Parameter_X] = FR_X // Store Parameter 1 on stack
nop.f 0
add GR_Parameter_RESULT = 0,GR_Parameter_Y // Parameter 3 address
}
{ .mib
stfs [GR_Parameter_Y] = FR_RESULT // Store Parameter 3 on stack
add GR_Parameter_Y = -16,GR_Parameter_Y
br.call.sptk b0=__libm_error_support# // Call error handling function
};;
{ .mmi
nop.m 0
nop.m 0
add GR_Parameter_RESULT = 48,sp
};;
{ .mmi
ldfs f8 = [GR_Parameter_RESULT] // Get return result off stack
.restore sp
add sp = 64,sp // Restore stack pointer
mov b0 = GR_SAVE_B0 // Restore return address
};;
{ .mib
mov gp = GR_SAVE_GP // Restore gp
mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
br.ret.sptk b0 // Return
};;
.endp __libm_error_region
ASM_SIZE_DIRECTIVE(__libm_error_region)
.type __libm_error_support#,@function
.global __libm_error_support#

View File

@ -0,0 +1 @@
/* Not needed. */

538
sysdeps/ia64/fpu/e_fmod.S Normal file
View File

@ -0,0 +1,538 @@
.file "fmod.s"
// Copyright (c) 2000, 2001, Intel Corporation
// All rights reserved.
//
// Contributed 2/2/2000 by John Harrison, Cristina Iordache, Ted Kubaska,
// Bob Norin, Shane Story, and Ping Tak Peter Tang of the Computational
// Software Lab, Intel Corporation.
//
// WARRANTY DISCLAIMER
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
// http://developer.intel.com/opensource.
//
// History
//====================================================================
// 2/02/00 Initial version
// 3/02/00 New Algorithm
// 4/04/00 Unwind support added
// 8/15/00 Bundle added after call to __libm_error_support to properly
// set [the previously overwritten] GR_Parameter_RESULT.
//11/28/00 Set FR_Y to f9
//
// API
//====================================================================
// double fmod(double,double);
//
// Overview of operation
//====================================================================
// fmod(a,b)=a-i*b,
// where i is an integer such that, if b!=0,
// |i|<|a/b| and |a/b-i|<1
//
// Algorithm
//====================================================================
// a). if |a|<|b|, return a
// b). get quotient and reciprocal overestimates accurate to
// 33 bits (q2,y2)
// c). if the exponent difference (exponent(a)-exponent(b))
// is less than 32, truncate quotient to integer and
// finish in one iteration
// d). if exponent(a)-exponent(b)>=32 (q2>=2^32)
// round quotient estimate to single precision (k=RN(q2)),
// calculate partial remainder (a'=a-k*b),
// get quotient estimate (a'*y2), and repeat from c).
//
// Special cases
//====================================================================
// b=+/-0: return NaN, call libm_error_support
// a=+/-Inf, a=NaN or b=NaN: return NaN
//
// Registers used
//====================================================================
// Predicate registers: p6-p11
// General registers: r2,r29,r32 (ar.pfs), r33-r39
// Floating point registers: f6-f15
#include "libm_support.h"
.section .text
GR_SAVE_B0 = r33
GR_SAVE_PFS = r34
GR_SAVE_GP = r35
GR_SAVE_SP = r36
GR_Parameter_X = r37
GR_Parameter_Y = r38
GR_Parameter_RESULT = r39
GR_Parameter_TAG = r40
FR_X = f10
FR_Y = f9
FR_RESULT = f8
.proc fmod#
.align 32
.global fmod#
.align 32
fmod:
#ifdef _LIBC
.global __ieee754_fmod
.type __ieee754_fmod,@function
__ieee754_fmod:
#endif
// inputs in f8, f9
// result in f8
{ .mfi
alloc r32=ar.pfs,1,4,4,0
// f6=|a|
fmerge.s f6=f0,f8
mov r2 = 0x0ffdd
}
{.mfi
nop.m 0
// f7=|b|
fmerge.s f7=f0,f9
nop.i 0;;
}
{ .mfi
setf.exp f11 = r2
// (1) y0
frcpa.s1 f10,p6=f6,f7
nop.i 0
}
// Y +-NAN, +-inf, +-0? p7
{ .mfi
nop.m 999
(p0) fclass.m.unc p7,p0 = f9, 0xe7
nop.i 999;;
}
// qnan snan inf norm unorm 0 -+
// 1 1 1 0 0 0 11
// e 3
// X +-NAN, +-inf, ? p9
{ .mfi
nop.m 999
(p0) fclass.m.unc p9,p0 = f8, 0xe3
nop.i 999
}
// |x| < |y|? Return x p8
{ .mfi
nop.m 999
(p0) fcmp.lt.unc.s1 p8,p0 = f6,f7
nop.i 999 ;;
}
{ .mfi
nop.m 0
// normalize y (if |x|<|y|)
(p8) fma.s0 f9=f9,f1,f0
nop.i 0;;
}
{ .mfi
mov r2=0x1001f
// (2) q0=a*y0
(p6) fma.s1 f13=f6,f10,f0
nop.i 0
}
{ .mfi
nop.m 0
// (3) e0 = 1 - b * y0
(p6) fnma.s1 f12=f7,f10,f1
nop.i 0;;
}
{.mfi
nop.m 0
// normalize x (if |x|<|y|)
(p8) fma.d.s0 f8=f8,f1,f0
nop.i 0
}
{.bbb
(p9) br.cond.spnt L(FMOD_X_NAN_INF)
(p7) br.cond.spnt L(FMOD_Y_NAN_INF_ZERO)
// if |x|<|y|, return
(p8) br.ret.spnt b0;;
}
{.mfi
nop.m 0
// normalize x
fma.s0 f6=f6,f1,f0
nop.i 0
}
{.mfi
nop.m 0
// normalize y
fma.s0 f7=f7,f1,f0
nop.i 0;;
}
{.mfi
// f15=2^32
setf.exp f15=r2
// (4) q1=q0+e0*q0
(p6) fma.s1 f13=f12,f13,f13
nop.i 0
}
{ .mfi
nop.m 0
// (5) e1 = e0 * e0 + 2^-34
(p6) fma.s1 f14=f12,f12,f11
nop.i 0;;
}
{.mlx
nop.m 0
movl r2=0x33a00000;;
}
{ .mfi
nop.m 0
// (6) y1 = y0 + e0 * y0
(p6) fma.s1 f10=f12,f10,f10
nop.i 0;;
}
{.mfi
// set f12=1.25*2^{-24}
setf.s f12=r2
// (7) q2=q1+e1*q1
(p6) fma.s1 f13=f13,f14,f13
nop.i 0;;
}
{.mfi
nop.m 0
fmerge.s f9=f8,f9
nop.i 0
}
{ .mfi
nop.m 0
// (8) y2 = y1 + e1 * y1
(p6) fma.s1 f10=f14,f10,f10
// set p6=0, p10=0
cmp.ne.and p6,p10=r0,r0;;
}
.align 32
L(loop53):
{.mfi
nop.m 0
// compare q2, 2^32
fcmp.lt.unc.s1 p8,p7=f13,f15
nop.i 0
}
{.mfi
nop.m 0
// will truncate quotient to integer, if exponent<32 (in advance)
fcvt.fx.trunc.s1 f11=f13
nop.i 0;;
}
{.mfi
nop.m 0
// if exponent>32, round quotient to single precision (perform in advance)
fma.s.s1 f13=f13,f1,f0
nop.i 0;;
}
{.mfi
nop.m 0
// set f12=sgn(a)
(p8) fmerge.s f12=f8,f1
nop.i 0
}
{.mfi
nop.m 0
// normalize truncated quotient
(p8) fcvt.xf f13=f11
nop.i 0;;
}
{ .mfi
nop.m 0
// calculate remainder (assuming f13=RZ(Q))
(p7) fnma.s1 f14=f13,f7,f6
nop.i 0
}
{.mfi
nop.m 0
// also if exponent>32, round quotient to single precision
// and subtract 1 ulp: q=q-q*(1.25*2^{-24})
(p7) fnma.s.s1 f11=f13,f12,f13
nop.i 0;;
}
{.mfi
nop.m 0
// (p8) calculate remainder (82-bit format)
(p8) fnma.s1 f11=f13,f7,f6
nop.i 0
}
{.mfi
nop.m 0
// (p7) calculate remainder (assuming f11=RZ(Q))
(p7) fnma.s1 f6=f11,f7,f6
nop.i 0;;
}
{.mfi
nop.m 0
// Final iteration (p8): is f6 the correct remainder (quotient was not overestimated) ?
(p8) fcmp.lt.unc.s1 p6,p10=f11,f0
nop.i 0;;
}
{.mfi
nop.m 0
// get new quotient estimation: a'*y2
(p7) fma.s1 f13=f14,f10,f0
nop.i 0
}
{.mfb
nop.m 0
// was f14=RZ(Q) ? (then new remainder f14>=0)
(p7) fcmp.lt.unc.s1 p7,p9=f14,f0
nop.b 0;;
}
.pred.rel "mutex",p6,p10
{.mfb
nop.m 0
// add b to estimated remainder (to cover the case when the quotient was overestimated)
// also set correct sign by using f9=|b|*sgn(a), f12=sgn(a)
(p6) fma.d.s0 f8=f11,f12,f9
nop.b 0
}
{.mfb
nop.m 0
// calculate remainder (single precision)
// set correct sign of result before returning
(p10) fma.d.s0 f8=f11,f12,f0
(p8) br.ret.sptk b0;;
}
{.mfi
nop.m 0
// if f13!=RZ(Q), get alternative quotient estimation: a''*y2
(p7) fma.s1 f13=f6,f10,f0
nop.i 0
}
{.mfb
nop.m 0
// if f14 was RZ(Q), set remainder to f14
(p9) mov f6=f14
br.cond.sptk L(loop53);;
}
L(FMOD_X_NAN_INF):
// Y zero ?
{.mfi
nop.m 0
fma.s1 f10=f9,f1,f0
nop.i 0;;
}
{.mfi
nop.m 0
fcmp.eq.unc.s1 p11,p0=f10,f0
nop.i 0;;
}
{.mib
nop.m 0
nop.i 0
// if Y zero
(p11) br.cond.spnt L(FMOD_Y_ZERO);;
}
// X infinity? Return QNAN indefinite
{ .mfi
nop.m 999
(p0) fclass.m.unc p8,p9 = f8, 0x23
nop.i 999;;
}
// Y NaN ?
{.mfi
nop.m 999
(p8) fclass.m p9,p8=f9,0xc3
nop.i 0;;
}
{.mfi
nop.m 999
(p8) frcpa.s0 f8,p0 = f8,f8
nop.i 0
}
{ .mfi
nop.m 999
// also set Denormal flag if necessary
(p8) fma.s0 f9=f9,f1,f0
nop.i 999 ;;
}
{ .mfb
nop.m 999
(p8) fma.d f8=f8,f1,f0
nop.b 999 ;;
}
{ .mfb
nop.m 999
(p9) frcpa.s0 f8,p7=f8,f9
br.ret.sptk b0 ;;
}
L(FMOD_Y_NAN_INF_ZERO):
// Y INF
{ .mfi
nop.m 999
(p0) fclass.m.unc p7,p0 = f9, 0x23
nop.i 999 ;;
}
{ .mfb
nop.m 999
(p7) fma.d f8=f8,f1,f0
(p7) br.ret.spnt b0 ;;
}
// Y NAN?
{ .mfi
nop.m 999
(p0) fclass.m.unc p9,p0 = f9, 0xc3
nop.i 999 ;;
}
{ .mfb
nop.m 999
(p9) fma.d f8=f9,f1,f0
(p9) br.ret.spnt b0 ;;
}
L(FMOD_Y_ZERO):
// Y zero? Must be zero at this point
// because it is the only choice left.
// Return QNAN indefinite
{.mfi
nop.m 0
// set Invalid
frcpa f12,p0=f0,f0
nop.i 0
}
// X NAN?
{ .mfi
nop.m 999
(p0) fclass.m.unc p9,p10 = f8, 0xc3
nop.i 999 ;;
}
{ .mfi
nop.m 999
(p10) fclass.nm p9,p10 = f8, 0xff
nop.i 999 ;;
}
{.mfi
nop.m 999
(p9) frcpa f11,p7=f8,f0
nop.i 0;;
}
{ .mfi
nop.m 999
(p10) frcpa f11,p7 = f9,f9
(p0) mov GR_Parameter_TAG = 121 ;;
}
{ .mfi
nop.m 999
(p0) fmerge.s f10 = f8, f8
nop.i 999
}
{ .mfb
nop.m 999
(p0) fma.d f8=f11,f1,f0
(p0) br.sptk __libm_error_region;;
}
.endp fmod
ASM_SIZE_DIRECTIVE(fmod)
ASM_SIZE_DIRECTIVE(__ieee754_fmod)
.proc __libm_error_region
__libm_error_region:
.prologue
{ .mfi
add GR_Parameter_Y=-32,sp // Parameter 2 value
nop.f 0
.save ar.pfs,GR_SAVE_PFS
mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
}
{ .mfi
.fframe 64
add sp=-64,sp // Create new stack
nop.f 0
mov GR_SAVE_GP=gp // Save gp
};;
{ .mmi
stfd [GR_Parameter_Y] = FR_Y,16 // Save Parameter 2 on stack
add GR_Parameter_X = 16,sp // Parameter 1 address
.save b0, GR_SAVE_B0
mov GR_SAVE_B0=b0 // Save b0
};;
.body
{ .mib
stfd [GR_Parameter_X] = FR_X // Store Parameter 1 on stack
add GR_Parameter_RESULT = 0,GR_Parameter_Y
nop.b 0 // Parameter 3 address
}
{ .mib
stfd [GR_Parameter_Y] = FR_RESULT // Store Parameter 3 on stack
add GR_Parameter_Y = -16,GR_Parameter_Y
br.call.sptk b0=__libm_error_support# // Call error handling function
};;
{ .mmi
nop.m 0
nop.m 0
add GR_Parameter_RESULT = 48,sp
};;
{ .mmi
ldfd f8 = [GR_Parameter_RESULT] // Get return result off stack
.restore sp
add sp = 64,sp // Restore stack pointer
mov b0 = GR_SAVE_B0 // Restore return address
};;
{ .mib
mov gp = GR_SAVE_GP // Restore gp
mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
br.ret.sptk b0 // Return
};;
.endp __libm_error_region
ASM_SIZE_DIRECTIVE(__libm_error_region)
.type __libm_error_support#,@function
.global __libm_error_support#

553
sysdeps/ia64/fpu/e_fmodf.S Normal file
View File

@ -0,0 +1,553 @@
.file "fmodf.s"
// Copyright (c) 2000, 2001, Intel Corporation
// All rights reserved.
//
// Contributed 2/2/2000 by John Harrison, Cristina Iordache, Ted Kubaska,
// Bob Norin, Shane Story, and Ping Tak Peter Tang of the Computational
// Software Lab, Intel Corporation.
//
// WARRANTY DISCLAIMER
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
// http://developer.intel.com/opensource.
//
// History
//====================================================================
// 2/02/00 Initial version
// 3/02/00 New Algorithm
// 4/04/00 Unwind support added
// 8/15/00 Bundle added after call to __libm_error_support to properly
// set [the previously overwritten] GR_Parameter_RESULT.
//11/28/00 Set FR_Y to f9
//
// API
//====================================================================
// float fmodf(float,float);
//
// Overview of operation
//====================================================================
// fmod(a,b)=a-i*b,
// where i is an integer such that, if b!=0,
// |i|<|a/b| and |a/b-i|<1
// Algorithm
//====================================================================
// a). if |a|<|b|, return a
// b). get quotient and reciprocal overestimates accurate to
// 33 bits (q2,y2)
// c). if the exponent difference (exponent(a)-exponent(b))
// is less than 32, truncate quotient to integer and
// finish in one iteration
// d). if exponent(a)-exponent(b)>=32 (q2>=2^32)
// round quotient estimate to single precision (k=RN(q2)),
// calculate partial remainder (a'=a-k*b),
// get quotient estimate (a'*y2), and repeat from c).
// Special cases
//====================================================================
// b=+/-0: return NaN, call libm_error_support
// a=+/-Inf, a=NaN or b=NaN: return NaN
// Registers used
//====================================================================
// Predicate registers: p6-p11
// General registers: r2,r29,r32 (ar.pfs), r33-r39
// Floating point registers: f6-f15
#include "libm_support.h"
.section .text
GR_SAVE_B0 = r33
GR_SAVE_PFS = r34
GR_SAVE_GP = r35
GR_SAVE_SP = r36
GR_Parameter_X = r37
GR_Parameter_Y = r38
GR_Parameter_RESULT = r39
GR_Parameter_TAG = r40
FR_X = f10
FR_Y = f9
FR_RESULT = f8
.proc fmodf#
.align 32
.global fmodf#
.align 32
fmodf:
#ifdef _LIBC
.global __ieee754_fmodf
.type __ieee754_fmodf,@function
__ieee754_fmodf:
#endif
// inputs in f8, f9
// result in f8
{ .mfi
alloc r32=ar.pfs,1,4,4,0
// f6=|a|
fmerge.s f6=f0,f8
mov r2 = 0x0ffdd
}
{.mfi
nop.m 0
// f7=|b|
fmerge.s f7=f0,f9
nop.i 0;;
}
{ .mfi
setf.exp f11 = r2
// (1) y0
frcpa.s1 f10,p6=f6,f7
nop.i 0
}
// eliminate special cases
// Y +-NAN, +-inf, +-0? p7
{ .mfi
nop.m 999
(p0) fclass.m.unc p7,p0 = f9, 0xe7
nop.i 999;;
}
// qnan snan inf norm unorm 0 -+
// 1 1 1 0 0 0 11
// e 3
// X +-NAN, +-inf, ? p9
{ .mfi
nop.m 999
(p0) fclass.m.unc p9,p0 = f8, 0xe3
nop.i 999
}
// |x| < |y|? Return x p8
{ .mfi
nop.m 999
(p0) fcmp.lt.unc.s1 p8,p0 = f6,f7
nop.i 999 ;;
}
{ .mfi
nop.m 0
// normalize y (if |x|<|y|)
(p8) fma.s0 f9=f9,f1,f0
nop.i 0;;
}
{ .mfi
mov r2=0x1001f
// (2) q0=a*y0
(p6) fma.s1 f13=f6,f10,f0
nop.i 0
}
{ .mfi
nop.m 0
// (3) e0 = 1 - b * y0
(p6) fnma.s1 f12=f7,f10,f1
nop.i 0;;
}
{.mfi
nop.m 0
// normalize x (if |x|<|y|)
(p8) fma.s.s0 f8=f8,f1,f0
nop.i 0
}
{.bbb
(p9) br.cond.spnt L(FMOD_X_NAN_INF)
(p7) br.cond.spnt L(FMOD_Y_NAN_INF_ZERO)
// if |x|<|y|, return
(p8) br.ret.spnt b0;;
}
{.mfi
nop.m 0
// normalize x
fma.s0 f6=f6,f1,f0
nop.i 0
}
{.mfi
nop.m 0
// normalize y
fma.s0 f7=f7,f1,f0
nop.i 0;;
}
{.mfi
// f15=2^32
setf.exp f15=r2
// (4) q1=q0+e0*q0
(p6) fma.s1 f13=f12,f13,f13
nop.i 0
}
{ .mfi
nop.m 0
// (5) e1 = e0 * e0 + 2^-34
(p6) fma.s1 f14=f12,f12,f11
nop.i 0;;
}
{.mlx
nop.m 0
movl r2=0x33a00000;;
}
{ .mfi
nop.m 0
// (6) y1 = y0 + e0 * y0
(p6) fma.s1 f10=f12,f10,f10
nop.i 0;;
}
{.mfi
// set f12=1.25*2^{-24}
setf.s f12=r2
// (7) q2=q1+e1*q1
(p6) fma.s1 f13=f13,f14,f13
nop.i 0;;
}
{.mfi
nop.m 0
fmerge.s f9=f8,f9
nop.i 0
}
{ .mfi
nop.m 0
// (8) y2 = y1 + e1 * y1
(p6) fma.s1 f10=f14,f10,f10
// set p6=0, p10=0
cmp.ne.and p6,p10=r0,r0;;
}
.align 32
L(loop24):
{.mfi
nop.m 0
// compare q2, 2^32
fcmp.lt.unc.s1 p8,p7=f13,f15
nop.i 0
}
{.mfi
nop.m 0
// will truncate quotient to integer, if exponent<32 (in advance)
fcvt.fx.trunc.s1 f11=f13
nop.i 0;;
}
{.mfi
nop.m 0
// if exponent>32, round quotient to single precision (perform in advance)
fma.s.s1 f13=f13,f1,f0
nop.i 0;;
}
{.mfi
nop.m 0
// set f12=sgn(a)
(p8) fmerge.s f12=f8,f1
nop.i 0
}
{.mfi
nop.m 0
// normalize truncated quotient
(p8) fcvt.xf f13=f11
nop.i 0;;
}
{ .mfi
nop.m 0
// calculate remainder (assuming f13=RZ(Q))
(p7) fnma.s1 f14=f13,f7,f6
nop.i 0
}
{.mfi
nop.m 0
// also if exponent>32, round quotient to single precision
// and subtract 1 ulp: q=q-q*(1.25*2^{-24})
(p7) fnma.s.s1 f11=f13,f12,f13
nop.i 0;;
}
{.mfi
nop.m 0
// (p8) calculate remainder (82-bit format)
(p8) fnma.s1 f11=f13,f7,f6
nop.i 0
}
{.mfi
nop.m 0
// (p7) calculate remainder (assuming f11=RZ(Q))
(p7) fnma.s1 f6=f11,f7,f6
nop.i 0;;
}
{.mfi
nop.m 0
// Final iteration (p8): is f6 the correct remainder (quotient was not overestimated) ?
(p8) fcmp.lt.unc.s1 p6,p10=f11,f0
nop.i 0;;
}
{.mfi
nop.m 0
// get new quotient estimation: a'*y2
(p7) fma.s1 f13=f14,f10,f0
nop.i 0
}
{.mfb
nop.m 0
// was f14=RZ(Q) ? (then new remainder f14>=0)
(p7) fcmp.lt.unc.s1 p7,p9=f14,f0
nop.b 0;;
}
.pred.rel "mutex",p6,p10
{.mfb
nop.m 0
// add b to estimated remainder (to cover the case when the quotient was overestimated)
// also set correct sign by using f9=|b|*sgn(a), f12=sgn(a)
(p6) fma.s.s0 f8=f11,f12,f9
nop.b 0
}
{.mfb
nop.m 0
// calculate remainder (single precision)
// set correct sign of result before returning
(p10) fma.s.s0 f8=f11,f12,f0
(p8) br.ret.sptk b0;;
}
{.mfi
nop.m 0
// if f13!=RZ(Q), get alternative quotient estimation: a''*y2
(p7) fma.s1 f13=f6,f10,f0
nop.i 0
}
{.mfb
nop.m 0
// if f14 was RZ(Q), set remainder to f14
(p9) mov f6=f14
br.cond.sptk L(loop24);;
}
{ .mmb
nop.m 0
nop.m 0
br.ret.sptk b0;;
}
L(FMOD_X_NAN_INF):
// Y zero ?
{.mfi
nop.m 0
fma.s1 f10=f9,f1,f0
nop.i 0;;
}
{.mfi
nop.m 0
fcmp.eq.unc.s1 p11,p0=f10,f0
nop.i 0;;
}
{.mib
nop.m 0
nop.i 0
// if Y zero
(p11) br.cond.spnt L(FMOD_Y_ZERO);;
}
// X infinity? Return QNAN indefinite
{ .mfi
nop.m 999
(p0) fclass.m.unc p8,p9 = f8, 0x23
nop.i 999;;
}
// Y NaN ?
{.mfi
nop.m 999
(p8) fclass.m p9,p8=f9,0xc3
nop.i 0;;
}
{.mfi
nop.m 999
(p8) frcpa.s0 f8,p0 = f8,f8
nop.i 0
}
{ .mfi
nop.m 999
// also set Denormal flag if necessary
(p8) fma.s0 f9=f9,f1,f0
nop.i 999 ;;
}
{ .mfb
nop.m 999
(p8) fma.s f8=f8,f1,f0
nop.b 999 ;;
}
{ .mfb
nop.m 999
(p9) frcpa.s0 f8,p7=f8,f9
br.ret.sptk b0 ;;
}
L(FMOD_Y_NAN_INF_ZERO):
// Y INF
{ .mfi
nop.m 999
(p0) fclass.m.unc p7,p0 = f9, 0x23
nop.i 999 ;;
}
{ .mfb
nop.m 999
(p7) fma.s f8=f8,f1,f0
(p7) br.ret.spnt b0 ;;
}
// Y NAN?
{ .mfi
nop.m 999
(p0) fclass.m.unc p9,p0 = f9, 0xc3
nop.i 999 ;;
}
{ .mfb
nop.m 999
(p9) fma.s f8=f9,f1,f0
(p9) br.ret.spnt b0 ;;
}
L(FMOD_Y_ZERO):
// Y zero? Must be zero at this point
// because it is the only choice left.
// Return QNAN indefinite
{.mfi
nop.m 0
// set Invalid
frcpa f12,p0=f0,f0
nop.i 999
}
// X NAN?
{ .mfi
nop.m 999
(p0) fclass.m.unc p9,p10 = f8, 0xc3
nop.i 999 ;;
}
{ .mfi
nop.m 999
(p10) fclass.nm p9,p10 = f8, 0xff
nop.i 999 ;;
}
{.mfi
nop.m 999
(p9) frcpa f11,p7=f8,f0
nop.i 0;;
}
{ .mfi
nop.m 999
(p10) frcpa f11,p7 = f0,f0
nop.i 999;;
}
{ .mfi
nop.m 999
(p0) fmerge.s f10 = f8, f8
nop.i 999
}
{ .mfi
nop.m 999
(p0) fma.s f8=f11,f1,f0
nop.i 999;;
}
L(EXP_ERROR_RETURN):
{ .mib
nop.m 0
(p0) mov GR_Parameter_TAG=122
(p0) br.sptk __libm_error_region;;
}
.endp fmodf
ASM_SIZE_DIRECTIVE(fmodf)
ASM_SIZE_DIRECTIVE(__ieee754_fmodf)
.proc __libm_error_region
__libm_error_region:
.prologue
{ .mfi
add GR_Parameter_Y=-32,sp // Parameter 2 value
nop.f 0
.save ar.pfs,GR_SAVE_PFS
mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
}
{ .mfi
.fframe 64
add sp=-64,sp // Create new stack
nop.f 0
mov GR_SAVE_GP=gp // Save gp
};;
{ .mmi
stfs [GR_Parameter_Y] = FR_Y,16 // Save Parameter 2 on stack
add GR_Parameter_X = 16,sp // Parameter 1 address
.save b0, GR_SAVE_B0
mov GR_SAVE_B0=b0 // Save b0
};;
.body
{ .mib
stfs [GR_Parameter_X] = FR_X // Store Parameter 1 on stack
add GR_Parameter_RESULT = 0,GR_Parameter_Y
nop.b 0 // Parameter 3 address
}
{ .mib
stfs [GR_Parameter_Y] = FR_RESULT // Store Parameter 3 on stack
add GR_Parameter_Y = -16,GR_Parameter_Y
br.call.sptk b0=__libm_error_support#;; // Call error handling function
}
{ .mmi
nop.m 0
nop.m 0
add GR_Parameter_RESULT = 48,sp
};;
{ .mmi
ldfs f8 = [GR_Parameter_RESULT] // Get return result off stack
.restore sp
add sp = 64,sp // Restore stack pointer
mov b0 = GR_SAVE_B0 // Restore return address
};;
{ .mib
mov gp = GR_SAVE_GP // Restore gp
mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
br.ret.sptk b0 // Return
};;
.endp __libm_error_region
ASM_SIZE_DIRECTIVE(__libm_error_region)
.type __libm_error_support#,@function
.global __libm_error_support#

577
sysdeps/ia64/fpu/e_fmodl.S Normal file
View File

@ -0,0 +1,577 @@
.file "fmodl.s"
// Copyright (c) 2000, 2001, Intel Corporation
// All rights reserved.
//
// Contributed 2/2/2000 by John Harrison, Cristina Iordache, Ted Kubaska,
// Bob Norin, Shane Story, and Ping Tak Peter Tang of the Computational
// Software Lab, Intel Corporation.
//
// WARRANTY DISCLAIMER
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
// http://developer.intel.com/opensource.
//
// History
//====================================================================
// 2/02/00 Initial version
// 3/02/00 New Algorithm
// 4/04/00 Unwind support added
// 8/15/00 Bundle added after call to __libm_error_support to properly
// set [the previously overwritten] GR_Parameter_RESULT.
//11/28/00 Set FR_Y to f9
//
// API
//====================================================================
// long double fmodl(long double,long double);
//
// Overview of operation
//====================================================================
// fmod(a,b)=a-i*b,
// where i is an integer such that, if b!=0,
// |i|<|a/b| and |a/b-i|<1
//
// Algorithm
//====================================================================
// a). if |a|<|b|, return a
// b). get quotient and reciprocal overestimates accurate to
// 33 bits (q2,y2)
// c). if the exponent difference (exponent(a)-exponent(b))
// is less than 32, truncate quotient to integer and
// finish in one iteration
// d). if exponent(a)-exponent(b)>=32 (q2>=2^32)
// round quotient estimate to single precision (k=RN(q2)),
// calculate partial remainder (a'=a-k*b),
// get quotient estimate (a'*y2), and repeat from c).
//
// Registers used
//====================================================================
// Predicate registers: p6-p11
// General registers: r2,r29,r32 (ar.pfs), r33-r39
// Floating point registers: f6-f15
#include "libm_support.h"
.section .text
GR_SAVE_B0 = r33
GR_SAVE_PFS = r34
GR_SAVE_GP = r35
GR_SAVE_SP = r36
GR_Parameter_X = r37
GR_Parameter_Y = r38
GR_Parameter_RESULT = r39
GR_Parameter_TAG = r40
FR_X = f10
FR_Y = f9
FR_RESULT = f8
.proc fmodl#
.align 32
.global fmodl#
.align 32
fmodl:
#ifdef _LIBC
.global __ieee754_fmodl
.type __ieee754_fmodl,@function
__ieee754_fmodl:
#endif
// inputs in f8, f9
// result in f8
{ .mfi
alloc r32=ar.pfs,1,4,4,0
// f6=|a|
fmerge.s f6=f0,f8
mov r2 = 0x0ffdd
}
{.mfi
getf.sig r29=f9
// f7=|b|
fmerge.s f7=f0,f9
nop.i 0;;
}
{ .mfi
setf.exp f11 = r2
// (1) y0
frcpa.s1 f10,p6=f6,f7
nop.i 0;;
}
// eliminate special cases
{.mmi
nop.m 0
nop.m 0
// y pseudo-zero ?
cmp.eq p7,p10=r29,r0;;
}
// Y +-NAN, +-inf, +-0? p7
{ .mfi
nop.m 999
(p10) fclass.m p7,p10 = f9, 0xe7
nop.i 999;;
}
// qnan snan inf norm unorm 0 -+
// 1 1 1 0 0 0 11
// e 3
// X +-NAN, +-inf, ? p9
{ .mfi
nop.m 999
(p0) fclass.m.unc p9,p11 = f8, 0xe3
nop.i 999
}
// |x| < |y|? Return x p8
{ .mfi
nop.m 999
(p10) fcmp.lt.unc.s1 p8,p0 = f6,f7
nop.i 999 ;;
}
{ .mfi
mov r2=0x1001f
// (2) q0=a*y0
(p6) fma.s1 f13=f6,f10,f0
nop.i 0
} { .mfi
nop.m 0
// (3) e0 = 1 - b * y0
(p6) fnma.s1 f12=f7,f10,f1
nop.i 0;;
}
// Y +-NAN, +-inf, +-0? p7
{ .mfi
nop.m 999
// pseudo-NaN ?
(p10) fclass.nm p7,p0 = f9, 0xff
nop.i 999
}
// qnan snan inf norm unorm 0 -+
// 1 1 1 0 0 0 11
// e 3
// X +-NAN, +-inf, ? p9
{ .mfi
nop.m 999
(p11) fclass.nm p9,p0 = f8, 0xff
nop.i 999;;
}
{ .mfi
nop.m 0
// y denormal ? set D flag (if |x|<|y|)
(p8) fnma.s0 f10=f9,f1,f9
nop.i 0;;
}
{.mfi
nop.m 0
// normalize x (if |x|<|y|)
(p8) fma.s0 f8=f8,f1,f0
nop.i 0
}
{.bbb
(p9) br.cond.spnt L(FMOD_X_NAN_INF)
(p7) br.cond.spnt L(FMOD_Y_NAN_INF_ZERO)
// if |x|<|y|, return
(p8) br.ret.spnt b0;;
}
{.mfi
nop.m 0
// x denormal ? set D flag
fnma.s0 f32=f6,f1,f6
nop.i 0
}
{.mfi
nop.m 0
// y denormal ? set D flag
fnma.s0 f33=f7,f1,f7
nop.i 0;;
}
{.mfi
// f15=2^32
setf.exp f15=r2
// (4) q1=q0+e0*q0
(p6) fma.s1 f13=f12,f13,f13
nop.i 0
}
{ .mfi
nop.m 0
// (5) e1 = e0 * e0 + 2^-34
(p6) fma.s1 f14=f12,f12,f11
nop.i 0;;
}
{.mlx
nop.m 0
movl r2=0x33a00000;;
}
{ .mfi
nop.m 0
// (6) y1 = y0 + e0 * y0
(p6) fma.s1 f10=f12,f10,f10
nop.i 0;;
}
{.mfi
// set f12=1.25*2^{-24}
setf.s f12=r2
// (7) q2=q1+e1*q1
(p6) fma.s1 f13=f13,f14,f13
nop.i 0;;
}
{.mfi
nop.m 0
fmerge.s f9=f8,f9
nop.i 0
}
{ .mfi
nop.m 0
// (8) y2 = y1 + e1 * y1
(p6) fma.s1 f10=f14,f10,f10
// set p6=0, p10=0
cmp.ne.and p6,p10=r0,r0;;
}
.align 32
L(loop64):
{.mfi
nop.m 0
// compare q2, 2^32
fcmp.lt.unc.s1 p8,p7=f13,f15
nop.i 0
}
{.mfi
nop.m 0
// will truncate quotient to integer, if exponent<32 (in advance)
fcvt.fx.trunc.s1 f11=f13
nop.i 0;;
}
{.mfi
nop.m 0
// if exponent>32, round quotient to single precision (perform in advance)
fma.s.s1 f13=f13,f1,f0
nop.i 0;;
}
{.mfi
nop.m 0
// set f12=sgn(a)
(p8) fmerge.s f12=f8,f1
nop.i 0
}
{.mfi
nop.m 0
// normalize truncated quotient
(p8) fcvt.xf f13=f11
nop.i 0;;
}
{ .mfi
nop.m 0
// calculate remainder (assuming f13=RZ(Q))
(p7) fnma.s1 f14=f13,f7,f6
nop.i 0
}
{.mfi
nop.m 0
// also if exponent>32, round quotient to single precision
// and subtract 1 ulp: q=q-q*(1.25*2^{-24})
(p7) fnma.s.s1 f11=f13,f12,f13
nop.i 0;;
}
{.mfi
nop.m 0
// (p8) calculate remainder (82-bit format)
(p8) fnma.s1 f11=f13,f7,f6
nop.i 0
}
{.mfi
nop.m 0
// (p7) calculate remainder (assuming f11=RZ(Q))
(p7) fnma.s1 f6=f11,f7,f6
nop.i 0;;
}
{.mfi
nop.m 0
// Final iteration (p8): is f6 the correct remainder (quotient was not overestimated) ?
(p8) fcmp.lt.unc.s1 p6,p10=f11,f0
nop.i 0;;
}
{.mfi
nop.m 0
// get new quotient estimation: a'*y2
(p7) fma.s1 f13=f14,f10,f0
nop.i 0
}
{.mfb
nop.m 0
// was f13=RZ(Q) ? (then new remainder f14>=0)
(p7) fcmp.lt.unc.s1 p7,p9=f14,f0
nop.b 0;;
}
.pred.rel "mutex",p6,p10
{.mfb
nop.m 0
// add b to estimated remainder (to cover the case when the quotient was overestimated)
// also set correct sign by using f9=|b|*sgn(a), f12=sgn(a)
(p6) fma.s0 f8=f11,f12,f9
nop.b 0
}
{.mfb
nop.m 0
// set correct sign of result before returning: f12=sgn(a)
(p10) fma.s0 f8=f11,f12,f0
(p8) br.ret.sptk b0;;
}
{.mfi
nop.m 0
// if f13!=RZ(Q), get alternative quotient estimation: a''*y2
(p7) fma.s1 f13=f6,f10,f0
nop.i 0
}
{.mfb
nop.m 0
// if f14 was RZ(Q), set remainder to f14
(p9) mov f6=f14
br.cond.sptk L(loop64);;
}
L(FMOD_X_NAN_INF):
// Y zero ?
{.mfi
nop.m 0
fma.s1 f10=f9,f1,f0
nop.i 0;;
}
{.mfi
nop.m 0
fcmp.eq.unc.s1 p11,p0=f10,f0
nop.i 0;;
}
{.mib
nop.m 0
nop.i 0
// if Y zero
(p11) br.cond.spnt L(FMOD_Y_ZERO);;
}
// X infinity? Return QNAN indefinite
{ .mfi
// set p7 t0 0
cmp.ne p7,p0=r0,r0
(p0) fclass.m.unc p8,p9 = f8, 0x23
nop.i 999;;
}
// Y NaN ?
{.mfi
nop.m 999
(p8) fclass.m p9,p8=f9,0xc3
nop.i 0;;
}
// Y not pseudo-zero ? (r29 holds significand)
{.mii
nop.m 999
(p8) cmp.ne p7,p0=r29,r0
nop.i 0;;
}
{.mfi
nop.m 999
(p8) frcpa.s0 f8,p0 = f8,f8
nop.i 0
}
{ .mfi
nop.m 999
// also set Denormal flag if necessary
(p7) fnma.s0 f9=f9,f1,f9
nop.i 999 ;;
}
{ .mfb
nop.m 999
(p8) fma.s0 f8=f8,f1,f0
nop.b 999 ;;
}
{ .mfb
nop.m 999
(p9) frcpa.s0 f8,p7=f8,f9
br.ret.sptk b0 ;;
}
L(FMOD_Y_NAN_INF_ZERO):
// Y INF
{ .mfi
nop.m 999
(p0) fclass.m.unc p7,p0 = f9, 0x23
nop.i 999 ;;
}
{ .mfb
nop.m 999
(p7) fma f8=f8,f1,f0
(p7) br.ret.spnt b0 ;;
}
// Y NAN?
{ .mfi
nop.m 999
(p0) fclass.m.unc p9,p10 = f9, 0xc3
nop.i 999 ;;
}
{ .mfi
nop.m 999
(p10) fclass.nm p9,p0 = f9, 0xff
nop.i 999 ;;
}
{ .mfb
nop.m 999
(p9) fma f8=f9,f1,f0
(p9) br.ret.spnt b0 ;;
}
L(FMOD_Y_ZERO):
// Y zero? Must be zero at this point
// because it is the only choice left.
// Return QNAN indefinite
{.mfi
nop.m 0
// set Invalid
frcpa f12,p0=f0,f0
nop.i 0
}
// X NAN?
{ .mfi
nop.m 999
(p0) fclass.m.unc p9,p10 = f8, 0xc3
nop.i 999 ;;
}
{ .mfi
nop.m 999
(p10) fclass.nm p9,p10 = f8, 0xff
nop.i 999 ;;
}
{.mfi
nop.m 999
(p9) frcpa f11,p7=f8,f0
nop.i 0;;
}
{ .mfi
nop.m 999
(p10) frcpa f11,p7 = f9,f9
(p0) mov GR_Parameter_TAG = 120 ;;
}
{ .mfi
nop.m 999
(p0) fmerge.s f10 = f8, f8
nop.i 999
}
{ .mfb
nop.m 999
(p0) fma f8=f11,f1,f0
(p0) br.sptk __libm_error_region;;
}
.endp fmodl
ASM_SIZE_DIRECTIVE(fmodl)
ASM_SIZE_DIRECTIVE(__ieee754_fmodl)
.proc __libm_error_region
__libm_error_region:
.prologue
{ .mfi
add GR_Parameter_Y=-32,sp // Parameter 2 value
nop.f 0
.save ar.pfs,GR_SAVE_PFS
mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
}
{ .mfi
.fframe 64
add sp=-64,sp // Create new stack
nop.f 0
mov GR_SAVE_GP=gp // Save gp
};;
{ .mmi
stfe [GR_Parameter_Y] = FR_Y,16 // Save Parameter 2 on stack
add GR_Parameter_X = 16,sp // Parameter 1 address
.save b0, GR_SAVE_B0
mov GR_SAVE_B0=b0 // Save b0
};;
.body
{ .mib
stfe [GR_Parameter_X] = FR_X // Store Parameter 1 on stack
add GR_Parameter_RESULT = 0,GR_Parameter_Y
nop.b 0 // Parameter 3 address
}
{ .mib
stfe [GR_Parameter_Y] = FR_RESULT // Store Parameter 3 on stack
add GR_Parameter_Y = -16,GR_Parameter_Y
br.call.sptk b0=__libm_error_support# // Call error handling function
};;
{ .mmi
nop.m 0
nop.m 0
add GR_Parameter_RESULT = 48,sp
};;
{ .mmi
ldfe f8 = [GR_Parameter_RESULT] // Get return result off stack
.restore sp
add sp = 64,sp // Restore stack pointer
mov b0 = GR_SAVE_B0 // Restore return address
};;
{ .mib
mov gp = GR_SAVE_GP // Restore gp
mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
br.ret.sptk b0 // Return
};;
.endp __libm_error_region
ASM_SIZE_DIRECTIVE(__libm_error_region)
.type __libm_error_support#,@function
.global __libm_error_support#

438
sysdeps/ia64/fpu/e_hypot.S Normal file
View File

@ -0,0 +1,438 @@
.file "hypot.asm"
// Copyright (c) 2000, 2001, Intel Corporation
// All rights reserved.
//
// Contributed 2/2/2000 by John Harrison, Cristina Iordache, Ted Kubaska,
// Bob Norin, Shane Story, and Ping Tak Peter Tang of the
// Computational Software Lab, Intel Corporation.
//
// WARRANTY DISCLAIMER
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
// http://developer.intel.com/opensource.
//
// *********************************************************************
//
// History:
// 2/02/00 hand-optimized
// 4/04/00 Unwind support added
// 6/20/00 new version
// 8/15/00 Bundle added after call to __libm_error_support to properly
// set [the previously overwritten] GR_Parameter_RESULT.
//
// *********************************************************************
// ___________
// Function: hypot(x,y) = |(x^2 + y^2) = for double precision values
// x and y
// Also provides cabs functionality.
//
// *********************************************************************
//
// Resources Used:
//
// Floating-Point Registers: f8 (Input and Return Value)
// f9 (Input)
// f6 -f15, f32-f34
//
// General Purpose Registers:
// r2,r3,r29 (Scratch)
// r32-r36 (Locals)
// r37-r40 (Used to pass arguments to error handling routine)
//
// Predicate Registers: p6 - p10
//
// *********************************************************************
//
// IEEE Special Conditions:
//
// All faults and exceptions should be raised correctly.
// Overflow can occur.
// hypot(Infinity and anything) = +Infinity
// hypot(QNaN and anything) = QNaN
// hypot(SNaN and anything ) = QNaN
//
// *********************************************************************
//
// Implementation:
// x2 = x * x in double-extended
// y2 = y * y in double-extended
// temp = x2 + y2 in double-extended
// sqrt(temp) rounded to double
//
// *********************************************************************
#include "libm_support.h"
GR_SAVE_PFS = r33
GR_SAVE_B0 = r34
GR_SAVE_GP = r35
GR_Parameter_X = r36
GR_Parameter_Y = r37
GR_Parameter_RESULT = r38
GR_Parameter_TAG = r39
FR_X = f32
FR_Y = f33
FR_RESULT = f8
.section .text
#ifndef _LIBC
.proc cabs#
.global cabs#
cabs:
.endp cabs
#endif
.proc hypot#
.global hypot#
.align 64
hypot:
#ifdef _LIBC
.global __hypot
__hypot:
.global __ieee754_hypot
__ieee754_hypot:
#endif
{.mfi
alloc r32= ar.pfs,0,4,4,0
// Compute x*x
fma.s1 f10=f8,f8,f0
// r2=bias-1
mov r2=0xfffe
}
{.mfi
// 63/8
mov r3=0x40fc //0000
// y*y
fma.s1 f11=f9,f9,f0
// r29=429/16
mov r29=0x41d68;; //000
}
{ .mfi
nop.m 0
// Check if x is an Inf - if so return Inf even
// if y is a NaN (C9X)
fclass.m.unc p7, p6 = f8, 0x023
shl r3=r3,16
}
{.mfi
nop.m 0
// if possible overflow, copy f8 to f32
// set Denormal, if necessary
// (p8)
fma.d.s0 f32=f8,f1,f0
nop.i 0;;
}
{ .mfi
nop.m 0
// Check if y is an Inf - if so return Inf even
// if x is a NaN (C9X)
fclass.m.unc p8, p9 = f9, 0x023
shl r29=r29,12
}
{ .mfb
// f7=0.5
setf.exp f7=r2
// For x=inf, multiply y by 1 to raise invalid on y an SNaN
// (p7) fma.s0 f9=f9,f1,f0
// copy f9 to f33; set Denormal, if necessary
fma.d.s0 f33=f9,f1,f0
nop.b 0;;
}
{.mfb
// f13=63/8
setf.s f13=r3
// is y Zero ?
(p6) fclass.m p6,p0=f9,0x7
nop.b 0
}
{.mlx
nop.m 0
movl r2=0x408c0000;;
}
{.mfi
// f34=429/16
setf.s f34=r29
// is x Zero ?
(p9) fclass.m p9,p0=f8,0x7
// 231/16
mov r3=0x4167;; //0000
}
{.mfi
nop.m 0
// a=x2+y2
fma.s1 f12=f10,f1,f11
nop.i 0;;
}
{.mfi
nop.m 0
// y not NaN ?
(p9) fclass.m p8,p0=f9,0x3f
shl r3=r3,16
}
{.mfi
nop.m 0
// f6=2
fma.s1 f6=f1,f1,f1
nop.i 0;;
}
{.mfi
nop.m 0
// x not NaN ?
(p6) fclass.m p7,p0=f8,0x3f
nop.i 0;;
}
{.mfi
// f9=35/8
setf.s f9=r2
nop.f 0
// 2*emax-2
mov r2=0x107fb;;
}
{.mfb
nop.m 0
// if f8=Infinity or f9=Zero, return |f8|
(p7) fmerge.s f8=f0,f32
(p7) br.ret.spnt b0
}
{.mfb
nop.m 0
// if f9=Infinity or f8=Zero, return |f9|
(p8) fmerge.s f8=f0,f33
(p8) br.ret.spnt b0;;
}
{.mfi
// f10 =231/16
setf.s f10=r3
// z0=frsqrta(a)
frsqrta.s1 f8,p6=f12
nop.i 0;;
}
{ .mfi
nop.m 0
// Identify Natvals, Infs, NaNs, and Zeros
// and return result
fclass.m.unc p7, p0 = f12, 0x1E7
nop.i 0;;
}
{.mfb
// get exponent of x^2+y^2
getf.exp r3=f12
// if special case, set f8
(p7) mov f8=f12
(p7) br.ret.spnt b0;;
}
{.mfi
nop.m 0
// S0=a*z0
(p6) fma.s1 f14=f12,f8,f0
nop.i 0
}
{.mfi
nop.m 0
// H0=0.5*z0
(p6) fma.s1 f15=f8,f7,f0
nop.i 0;;
}
{.mfi
nop.m 0
// f6=5/2
fma.s1 f6=f7,f1,f6
nop.i 0
}
{.mfi
nop.m 0
// f11=3/2
fma.s1 f11=f7,f1,f1
nop.i 0;;
}
{.mfi
nop.m 0
// d=0.5-S0*H0
(p6) fnma.s1 f7=f14,f15,f7
nop.i 0;;
}
{.mfi
nop.m 0
// P67=231/16+429/16*d
(p6) fma.s1 f10=f34,f7,f10
nop.i 0
}
{.mfi
nop.m 0
// P45=63/8*d+35/8
(p6) fma.s1 f9=f13,f7,f9
nop.i 0;;
}
{.mfi
nop.m 0
// P23=5/2*d+3/2
(p6) fma.s1 f11=f6,f7,f11
nop.i 0
}
{.mfi
nop.m 0
// d2=d*d
(p6) fma.s1 f13=f7,f7,f0
nop.i 0;;
}
{.mfi
nop.m 0
// P47=d2*P67+P45
(p6) fma.s1 f10=f10,f13,f9
nop.i 0
}
{.mfi
nop.m 0
// P13=d*P23+1
(p6) fma.s1 f11=f11,f7,f1
nop.i 0;;
}
{.mfi
nop.m 0
// d3=d2*d
(p6) fma.s1 f13=f13,f7,f0
nop.i 0;;
}
{.mfi
nop.m 0
// T0=d*S0
(p6) fma.s1 f15=f7,f14,f0
nop.i 0
}
{.mfi
// Is x^2 + y^2 well less than the overflow
// threshold?
(p6) cmp.lt.unc p7, p8 = r3,r2
// P=P13+d3*P47
(p6) fma.s1 f10=f13,f10,f11
nop.i 0;;
}
{.mfb
nop.m 0
// S=P*T0+S0
fma.d.s0 f8=f10,f15,f14
// No overflow in this case
(p7) br.ret.sptk b0;;
}
{ .mfi
nop.m 0
(p8) fsetc.s2 0x7F,0x42
// Possible overflow path, must detect by
// Setting widest range exponent with prevailing
// rounding mode.
nop.i 0 ;;
}
{ .mfi
// bias+0x400 (bias+EMAX+1)
(p8) mov r2=0x103ff
// S=P*T0+S0
(p8) fma.d.s2 f12=f10,f15,f14
nop.i 0 ;;
}
{ .mfi
(p8) setf.exp f11 = r2
(p8) fsetc.s2 0x7F,0x40
// Restore Original Mode in S2
nop.i 0 ;;
}
{ .mfi
nop.m 0
(p8) fcmp.lt.unc.s1 p9, p10 = f12, f11
nop.i 0 ;;
}
{ .mib
nop.m 0
mov GR_Parameter_TAG = 46
// No overflow
(p9) br.ret.sptk b0;;
}
.endp hypot
ASM_SIZE_DIRECTIVE(hypot)
.proc __libm_error_region
__libm_error_region:
.prologue
{ .mfi
add GR_Parameter_Y=-32,sp // Parameter 2 value
nop.f 0
.save ar.pfs,GR_SAVE_PFS
mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
}
{ .mfi
.fframe 64
add sp=-64,sp // Create new stack
nop.f 0
mov GR_SAVE_GP=gp // Save gp
};;
{ .mmi
stfd [GR_Parameter_Y] = FR_Y,16 // Save Parameter 2 on stack
add GR_Parameter_X = 16,sp // Parameter 1 address
.save b0, GR_SAVE_B0
mov GR_SAVE_B0=b0 // Save b0
};;
.body
{ .mib
stfd [GR_Parameter_X] = FR_X // Store Parameter 1 on stack
add GR_Parameter_RESULT = 0,GR_Parameter_Y
nop.b 0 // Parameter 3 address
}
{ .mib
stfd [GR_Parameter_Y] = FR_RESULT // Store Parameter 3 on stack
add GR_Parameter_Y = -16,GR_Parameter_Y
br.call.sptk b0=__libm_error_support# // Call error handling function
};;
{ .mmi
nop.m 0
nop.m 0
add GR_Parameter_RESULT = 48,sp
};;
{ .mmi
ldfd f8 = [GR_Parameter_RESULT] // Get return result off stack
.restore sp
add sp = 64,sp // Restore stack pointer
mov b0 = GR_SAVE_B0 // Restore return address
};;
{ .mib
mov gp = GR_SAVE_GP // Restore gp
mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
br.ret.sptk b0 // Return
};;
.endp __libm_error_region
ASM_SIZE_DIRECTIVE(__libm_error_region)
.type __libm_error_support#,@function
.global __libm_error_support#

394
sysdeps/ia64/fpu/e_hypotf.S Normal file
View File

@ -0,0 +1,394 @@
.file "hypotf.asm"
// Copyright (c) 2000, 2001, Intel Corporation
// All rights reserved.
//
// Contributed 2/2/2000 by John Harrison, Cristina Iordache, Ted Kubaska,
// Bob Norin, Shane Story, and Ping Tak Peter Tang of the
// Computational Software Lab, Intel Corporation.
//
// WARRANTY DISCLAIMER
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
// http://developer.intel.com/opensource.
//
// *********************************************************************
//
// History:
// 2/02/00 hand-optimized
// 4/04/00 Unwind support added
// 6/26/00 new version
// 8/15/00 Bundle added after call to __libm_error_support to properly
// set [the previously overwritten] GR_Parameter_RESULT.
//
// *********************************************************************
// ___________
// Function: hypotf(x,y) = |(x^2 + y^2) = for single precision values
// x and y
// Also provides cabsf functionality.
//
// *********************************************************************
//
// Resources Used:
//
// Floating-Point Registers: f8 (Input and Return Value)
// f9 (Input)
// f6 -f15
//
// General Purpose Registers:
// r2-r3 (Scratch)
// r32-r36 (Locals)
// r37-r40 (Used to pass arguments to error handling routine)
//
// Predicate Registers: p6 - p10
//
// *********************************************************************
//
// IEEE Special Conditions:
//
// All faults and exceptions should be raised correctly.
// Overflow can occur.
// hypotf(Infinity and anything) = +Infinity
// hypotf(QNaN and anything) = QNaN
// hypotf(SNaN and anything ) = QNaN
//
// *********************************************************************
//
// Implementation:
// x2 = x * x in double-extended
// y2 = y * y in double-extended
// temp = x2 + y2 in double-extended
// sqrt(temp) rounded to single precision
//
// *********************************************************************
#include "libm_support.h"
GR_SAVE_PFS = r33
GR_SAVE_B0 = r34
GR_SAVE_GP = r35
GR_Parameter_X = r36
GR_Parameter_Y = r37
GR_Parameter_RESULT = r38
GR_Parameter_TAG = r39
FR_X = f14
FR_Y = f15
FR_RESULT = f8
.section .text
#ifndef _LIBC
.proc cabsf#
.global cabsf#
cabsf:
.endp cabsf
#endif
.proc hypotf#
.global hypotf#
.align 64
hypotf:
#ifdef _LIBC
.global __hypotf
__hypotf:
.global __ieee754_hypotf
__ieee754_hypotf:
#endif
{.mfi
alloc r32= ar.pfs,0,4,4,0
// Compute x*x
fma.s1 f10=f8,f8,f0
// r2=bias-1
mov r2=0xfffe
}
{.mfi
nop.m 0
// y*y
fma.s1 f11=f9,f9,f0
nop.i 0;;
}
{ .mfi
nop.m 0
// Check if x is an Inf - if so return Inf even
// if y is a NaN (C9X)
fclass.m.unc p7, p6 = f8, 0x023
nop.i 0
}
{.mfi
nop.m 0
// if possible overflow, copy f8 to f14
// set Denormal, if necessary
// (p8)
fma.s.s0 f14=f8,f1,f0
nop.i 0;;
}
{ .mfi
nop.m 0
// Check if y is an Inf - if so return Inf even
// if x is a NaN (C9X)
fclass.m.unc p8, p9 = f9, 0x023
nop.i 0
}
{ .mfi
nop.m 0
// For x=inf, multiply y by 1 to raise invalid on y an SNaN
// (p7) fma.s0 f9=f9,f1,f0
// copy f9 to f15; set Denormal, if necessary
fma.s.s0 f15=f9,f1,f0
nop.i 0;;
}
{.mfi
nop.m 0
// is y Zero ?
(p6) fclass.m p6,p0=f9,0x7
nop.i 0;;
}
{.mfi
nop.m 0
// is x Zero ?
(p9) fclass.m p9,p0=f8,0x7
nop.i 0;;
}
{.mfi
// f7=0.5
setf.exp f7=r2
// a=x2+y2
fma.s1 f12=f10,f1,f11
nop.i 0;;
}
{.mfi
nop.m 0
// x not NaN ?
(p6) fclass.m p7,p0=f8,0x3f
nop.i 0
}
{.mfi
// 2*emax-2
mov r2=0x100fb
// f6=2
fma.s1 f6=f1,f1,f1
nop.i 0;;
}
{.mfi
nop.m 0
// y not NaN ?
(p9) fclass.m p8,p0=f9,0x3f
nop.i 0;;
}
{.mfb
nop.m 0
// if f8=Infinity or f9=Zero, return |f8|
(p7) fmerge.s f8=f0,f14
(p7) br.ret.spnt b0
}
{.mfb
nop.m 0
// if f9=Infinity or f8=Zero, return |f9|
(p8) fmerge.s f8=f0,f15
(p8) br.ret.spnt b0;;
}
{ .mfi
nop.m 0
// Identify Natvals, Infs, NaNs, and Zeros
// and return result
fclass.m.unc p7, p0 = f12, 0x1E7
nop.i 0
}
{.mfi
nop.m 0
// z0=frsqrta(a)
frsqrta.s1 f8,p6=f12
nop.i 0;;
}
{.mfb
// get exponent of x^2+y^2
getf.exp r3=f12
// if special case, set f8
(p7) mov f8=f12
(p7) br.ret.spnt b0;;
}
{.mfi
nop.m 0
// S0=a*z0
(p6) fma.s1 f12=f12,f8,f0
nop.i 0
}
{.mfi
nop.m 0
// H0=0.5*z0
(p6) fma.s1 f10=f8,f7,f0
nop.i 0;;
}
{.mfi
nop.m 0
// f6=5/2
fma.s1 f6=f7,f1,f6
nop.i 0
}
{.mfi
nop.m 0
// f11=3/2
fma.s1 f11=f7,f1,f1
nop.i 0;;
}
{.mfi
nop.m 0
// d=0.5-S0*H0
(p6) fnma.s1 f7=f12,f10,f7
nop.i 0;;
}
{.mfi
nop.m 0
// P01=d+1
(p6) fma.s1 f10=f1,f7,f1
nop.i 0
}
{.mfi
nop.m 0
// P23=5/2*d+3/2
(p6) fma.s1 f11=f6,f7,f11
nop.i 0;;
}
{.mfi
nop.m 0
// d2=d*d
(p6) fma.s1 f7=f7,f7,f0
nop.i 0;;
}
{.mfi
// Is x^2 + y^2 well less than the overflow
// threshold?
(p6) cmp.lt.unc p7, p8 = r3,r2
// P=P01+d2*P23
(p6) fma.s1 f10=f7,f11,f10
nop.i 0;;
}
{.mfb
nop.m 0
// S=P*S0
fma.s.s0 f8=f10,f12,f0
// No overflow in this case
(p7) br.ret.sptk b0;;
}
{ .mfi
nop.m 0
(p8) fsetc.s2 0x7F,0x42
// Possible overflow path, must detect by
// Setting widest range exponent with prevailing
// rounding mode.
nop.i 0 ;;
}
{ .mfi
// bias+0x400 (bias+EMAX+1)
(p8) mov r2=0x1007f
// S=P*S0
(p8) fma.s.s2 f12=f10,f12,f0
nop.i 0 ;;
}
{ .mfi
(p8) setf.exp f11 = r2
(p8) fsetc.s2 0x7F,0x40
// Restore Original Mode in S2
nop.i 0 ;;
}
{ .mfi
nop.m 0
(p8) fcmp.lt.unc.s1 p9, p10 = f12, f11
nop.i 0 ;;
}
{ .mib
nop.m 0
mov GR_Parameter_TAG = 47
// No overflow
(p9) br.ret.sptk b0;;
}
.endp hypotf
ASM_SIZE_DIRECTIVE(hypotf)
.proc __libm_error_region
__libm_error_region:
.prologue
{ .mii
add GR_Parameter_Y=-32,sp // Parameter 2 value
(p0) mov GR_Parameter_TAG = 47
.save ar.pfs,GR_SAVE_PFS
mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
}
{ .mfi
.fframe 64
add sp=-64,sp // Create new stack
nop.f 0
mov GR_SAVE_GP=gp // Save gp
};;
{ .mmi
stfs [GR_Parameter_Y] = FR_Y,16 // Store Parameter 2 on stack
add GR_Parameter_X = 16,sp // Parameter 1 address
.save b0, GR_SAVE_B0
mov GR_SAVE_B0=b0 // Save b0
};;
.body
{ .mib
stfs [GR_Parameter_X] = FR_X // Store Parameter 1 on stack
add GR_Parameter_RESULT = 0,GR_Parameter_Y
nop.b 0 // Parameter 3 address
}
{ .mib
stfs [GR_Parameter_Y] = FR_RESULT // Store Parameter 3 on stack
add GR_Parameter_Y = -16,GR_Parameter_Y
br.call.sptk b0=__libm_error_support# // Call error handling function
};;
{ .mmi
nop.m 0
nop.m 0
add GR_Parameter_RESULT = 48,sp
};;
{ .mmi
ldfs f8 = [GR_Parameter_RESULT] // Get return result off stack
.restore sp
add sp = 64,sp // Restore stack pointer
mov b0 = GR_SAVE_B0 // Restore return address
};;
{ .mib
mov gp = GR_SAVE_GP // Restore gp
mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
br.ret.sptk b0 // Return
};;
.endp __libm_error_region
ASM_SIZE_DIRECTIVE(__libm_error_region)
.type __libm_error_support#,@function
.global __libm_error_support#

478
sysdeps/ia64/fpu/e_hypotl.S Normal file
View File

@ -0,0 +1,478 @@
.file "hypotl.asm"
// Copyright (c) 2000, 2001, Intel Corporation
// All rights reserved.
//
// Contributed 2/2/2000 by John Harrison, Cristina Iordache, Ted Kubaska,
// Bob Norin, Shane Story, and Ping Tak Peter Tang of the
// Computational Software Lab, Intel Corporation.
//
// WARRANTY DISCLAIMER
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
// http://developer.intel.com/opensource.
//
// *********************************************************************
//
// History:
// 2/02/00 hand-optimized
// 4/04/00 Unwind support added
// 6/20/00 new version
// 8/15/00 Bundle added after call to __libm_error_support to properly
// set [the previously overwritten] GR_Parameter_RESULT.
//
// *********************************************************************
// ___________
// Function: hypotl(x,y) = |(x^2 + y^2) = for double extended values
// x and y
// Also provides cabsl functionality.
//
// *********************************************************************
//
// Resources Used:
//
// Floating-Point Registers: f8 (Input and Return Value)
// f9 (Input)
// f6 -f15, f32-f34
//
// General Purpose Registers:
// r2-r3 (Scratch)
// r32-r36 (Locals)
// r37-r40 (Used to pass arguments to error handling routine)
//
// Predicate Registers: p6 - p10
//
// *********************************************************************
//
// IEEE Special Conditions:
//
// All faults and exceptions should be raised correctly.
// Overflow can occur.
// hypotl(Infinity and anything) = +Infinity
// hypotl(QNaN and anything) = QNaN
// hypotl(SNaN and anything ) = QNaN
//
// *********************************************************************
//
// Implementation:
// x2 = x * x in double-extended
// y2 = y * y in double-extended
// temp = x2 + y2 in double-extended
// sqrt(temp) rounded to double extended
//
// *********************************************************************
#include "libm_support.h"
GR_SAVE_PFS = r33
GR_SAVE_B0 = r34
GR_SAVE_GP = r35
GR_Parameter_X = r36
GR_Parameter_Y = r37
GR_Parameter_RESULT = r38
GR_Parameter_TAG = r39
FR_X = f32
FR_Y = f33
FR_RESULT = f8
.section .text
#ifndef _LIBC
.proc cabsl#
.global cabsl#
cabsl:
.endp cabsl
#endif
.proc hypotl#
.global hypotl#
.align 64
hypotl:
#ifdef _LIBC
.global __hypotl
__hypotl:
.global __ieee754_hypotl
__ieee754_hypotl:
#endif
{.mfi
alloc r32= ar.pfs,0,4,4,0
// Compute x*x
fma.s1 f10=f8,f8,f0
// r2=bias-1
mov r2=0xfffe
}
{.mfi
nop.m 0
// y*y
fma.s1 f11=f9,f9,f0
nop.i 0;;
}
{ .mfi
nop.m 0
// Check if x is an Inf - if so return Inf even
// if y is a NaN (C9X)
fclass.m.unc p7, p6 = f8, 0x023
nop.i 0
}
{.mfi
nop.m 0
// if possible overflow, copy f8 to f32
// set Denormal, if necessary
// (p8)
fma.s0 f32=f8,f1,f0
nop.i 0;;
}
{ .mfi
nop.m 0
// Check if y is an Inf - if so return Inf even
// if x is a NaN (C9X)
fclass.m.unc p8, p9 = f9, 0x023
nop.i 0
}
{ .mfi
nop.m 999
// For x=inf, multiply y by 1 to raise invalid on y an SNaN
// (p7) fma.s0 f9=f9,f1,f0
// copy f9 to f33; set Denormal, if necessary
fma.s0 f33=f9,f1,f0
nop.i 0;;
}
{.mfi
nop.m 0
// is y Zero ?
(p6) fclass.m p6,p0=f9,0x7
nop.i 0;;
}
{.mfi
// f7=0.5
setf.exp f7=r2
// a=x2+y2
fma.s1 f12=f10,f1,f11
nop.i 0
}
{.mfi
mov r2=0x408c //0000
// dx=x*x-x2
fms.s1 f13=f8,f8,f10
nop.i 0;;
}
{.mfi
nop.m 0
// is x Zero ?
(p9) fclass.m p9,p0=f8,0x7
shl r2=r2,16
}
{.mfi
nop.m 0
// dy=y*y-y2
fms.s1 f14=f9,f9,f11
nop.i 0;;
}
{.mfi
nop.m 0
// x not NaN ?
(p6) fclass.m p7,p0=f8,0x3f
nop.i 0
}
{.mfi
nop.m 0
// f6=2
fma.s1 f6=f1,f1,f1
nop.i 0;;
}
{.mfi
nop.m 0
// f34=min(x2,y2)
famin.s1 f34=f10,f11
nop.i 0
}
{.mfb
nop.m 0
// f10=max(x2,y2)
famax.s1 f10=f11,f10
nop.b 0;; //
}
{.mfi
nop.m 0
// y not NaN ?
(p9) fclass.m p8,p0=f9,0x3f
nop.i 0;;
}
{.mfb
// f9=35/8
setf.s f9=r2
// if f8=Infinity or f9=Zero, return |f8|
(p7) fmerge.s f8=f0,f32
(p7) br.ret.spnt b0;;
}
{.mfi
nop.m 0
// z0=frsqrta(a)
frsqrta.s1 f8,p6=f12
nop.i 0;;
}
{ .mfi
nop.m 0
// Identify Natvals, Infs, NaNs, and Zeros
// and return result
fclass.m.unc p7, p0 = f12, 0x1E7
nop.i 0
}
{.mfi
// get exponent of x^2+y^2
getf.exp r3=f12
// dxy=dx+dy
fma.s1 f13=f13,f1,f14
nop.i 0;;
}
{.mfb
// 2*emax-2
mov r2=0x17ffb
// if f9=Infinity or f8=Zero, return |f9|
(p8) fmerge.s f8=f0,f33
(p8) br.ret.spnt b0
}
{.mfi
nop.m 0
// dd=a-max(x2,y2)
fnma.s1 f10=f10,f1,f12
nop.i 0;;
}
{.mfi
nop.m 0
// S0=a*z0
(p6) fma.s1 f14=f12,f8,f0
nop.i 0
}
{.mfi
nop.m 0
// H0=0.5*z0
(p6) fma.s1 f15=f8,f7,f0
nop.i 0;;
}
{.mfb
nop.m 0
// if special case, set f8
(p7) mov f8=f12
(p7) br.ret.spnt b0
}
{.mfi
nop.m 0
// da=min(x2,y2)-dd
fnma.s1 f10=f10,f1,f34
nop.i 0;;
}
{.mfi
nop.m 0
// f6=5/2
fma.s1 f6=f7,f1,f6
nop.i 0
}
{.mfi
nop.m 0
// f11=3/2
fma.s1 f11=f7,f1,f1
nop.i 0;;
}
{.mfi
nop.m 0
// d=0.5-S0*H0
(p6) fnma.s1 f7=f14,f15,f7
nop.i 0;;
}
{.mfi
nop.m 0
// P1=3/2*d+1
(p6) fma.s1 f11=f11,f7,f1
nop.i 0
}
{.mfi
nop.m 0
// P2=35/8*d+5/2
(p6) fma.s1 f9=f9,f7,f6
nop.i 0;;
}
{.mfi
nop.m 0
// d2=d*d
(p6) fma.s1 f34=f7,f7,f0
nop.i 0;;
}
{.mfi
nop.m 0
// T0=d*S0
(p6) fma.s1 f6=f7,f14,f0
nop.i 0
}
{.mfi
nop.m 0
// G0=d*H0
(p6) fma.s1 f7=f7,f15,f0
nop.i 0;;
}
{.mfi
nop.m 0
// P=d2*P2+P1
(p6) fma.s1 f11=f34,f9,f11
nop.i 0;;
}
{.mfi
nop.m 0
// S1=p*T0+S0
(p6) fma.s1 f14=f11,f6,f14
nop.i 0
}
{.mfi
nop.m 0
// H1=p*G0+H0
(p6) fma.s1 f15=f11,f7,f15
nop.i 0;;
}
{.mfi
nop.m 0
// e1=a-S1*S1
(p6) fnma.s1 f7=f14,f14,f12
nop.i 0
}
{.mfi
// Is x^2 + y^2 well less than the overflow
// threshold?
(p6) cmp.lt.unc p7, p8 = r3,r2
// c=dxy+da
(p6) fma.s1 f13=f13,f1,f10
nop.i 0;;
}
{.mfi
nop.m 0
// e=e1+c
(p6) fma.s1 f13=f7,f1,f13
nop.i 0;;
}
{.mfb
nop.m 0
// S=e*H1+S1
fma.s0 f8=f13,f15,f14
// No overflow in this case
(p7) br.ret.sptk b0;;
}
{ .mfi
nop.m 0
(p8) fsetc.s2 0x7F,0x42
// Possible overflow path, must detect by
// Setting widest range exponent with prevailing
// rounding mode.
nop.i 0 ;;
}
{ .mfi
// bias+0x4000 (bias+EMAX+1)
(p8) mov r2=0x13fff
// S=e*H1+S1
(p8) fma.s2 f12=f13,f15,f14
nop.i 0 ;;
}
{ .mfi
(p8) setf.exp f11 = r2
(p8) fsetc.s2 0x7F,0x40
// Restore Original Mode in S2
nop.i 0 ;;
}
{ .mfi
nop.m 0
(p8) fcmp.lt.unc.s1 p9, p10 = f12, f11
nop.i 0 ;;
}
{ .mib
nop.m 0
mov GR_Parameter_TAG = 45;
// No overflow
(p9) br.ret.sptk b0;;
}
.endp hypotl
ASM_SIZE_DIRECTIVE(hypotl)
.proc __libm_error_region
__libm_error_region:
.prologue
{ .mfi
add GR_Parameter_Y=-32,sp // Parameter 2 value
nop.f 0
.save ar.pfs,GR_SAVE_PFS
mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
}
{ .mfi
.fframe 64
add sp=-64,sp // Create new stack
nop.f 0
mov GR_SAVE_GP=gp // Save gp
};;
{ .mmi
stfe [GR_Parameter_Y] = FR_Y,16 // Save Parameter 2 on stack
add GR_Parameter_X = 16,sp // Parameter 1 address
.save b0, GR_SAVE_B0
mov GR_SAVE_B0=b0 // Save b0
};;
.body
{ .mib
stfe [GR_Parameter_X] = FR_X // Store Parameter 1 on stack
add GR_Parameter_RESULT = 0,GR_Parameter_Y
nop.b 0 // Parameter 3 address
}
{ .mib
stfe [GR_Parameter_Y] = FR_RESULT // Store Parameter 3 on stack
add GR_Parameter_Y = -16,GR_Parameter_Y
br.call.sptk b0=__libm_error_support# // Call error handling function
};;
{ .mmi
nop.m 0
nop.m 0
add GR_Parameter_RESULT = 48,sp
};;
{ .mmi
ldfe f8 = [GR_Parameter_RESULT] // Get return result off stack
.restore sp
add sp = 64,sp // Restore stack pointer
mov b0 = GR_SAVE_B0 // Restore return address
};;
{ .mib
mov gp = GR_SAVE_GP // Restore gp
mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
br.ret.sptk b0 // Return
};;
.endp __libm_error_region
ASM_SIZE_DIRECTIVE(__libm_error_region)
.type __libm_error_support#,@function
.global __libm_error_support#

1091
sysdeps/ia64/fpu/e_log.S Normal file

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1 @@
/* Not needed. */

View File

@ -0,0 +1 @@
/* Not needed. */

View File

@ -0,0 +1 @@
/* Not needed. */

946
sysdeps/ia64/fpu/e_logf.S Normal file
View File

@ -0,0 +1,946 @@
.file "logf.s"
// Copyright (c) 2000, 2001, Intel Corporation
// All rights reserved.
//
// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
//
// WARRANTY DISCLAIMER
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
// http://developer.intel.com/opensource.
//
// History
//==============================================================
// 3/01/00 Initial version
// 8/15/00 Bundle added after call to __libm_error_support to properly
// set [the previously overwritten] GR_Parameter_RESULT.
// 1/10/01 Improved speed, fixed flags for neg denormals
//
//
// API
//==============================================================
// float logf(float)
// float log10f(float)
//
// Overview of operation
//==============================================================
// Background
//
// Consider x = 2^N 1.f1 f2 f3 f4...f63
// Log(x) = log(frcpa(x) x/frcpa(x))
// = log(1/frcpa(x)) + log(frcpa(x) x)
// = -log(frcpa(x)) + log(frcpa(x) x)
//
// frcpa(x) = 2^-N frcpa((1.f1 f2 ... f63)
//
// -log(frcpa(x)) = -log(C)
// = -log(2^-N) - log(frcpa(1.f1 f2 ... f63))
//
// -log(frcpa(x)) = -log(C)
// = +Nlog2 - log(frcpa(1.f1 f2 ... f63))
//
// -log(frcpa(x)) = -log(C)
// = +Nlog2 + log(frcpa(1.f1 f2 ... f63))
//
// Log(x) = log(1/frcpa(x)) + log(frcpa(x) x)
// Log(x) = +Nlog2 + log(1./frcpa(1.f1 f2 ... f63)) + log(frcpa(x) x)
// Log(x) = +Nlog2 - log(/frcpa(1.f1 f2 ... f63)) + log(frcpa(x) x)
// Log(x) = +Nlog2 + T + log(frcpa(x) x)
//
// Log(x) = +Nlog2 + T + log(C x)
//
// Cx = 1 + r
//
// Log(x) = +Nlog2 + T + log(1+r)
// Log(x) = +Nlog2 + T + Series( r - r^2/2 + r^3/3 - r^4/4 ....)
//
// 1.f1 f2 ... f8 has 256 entries.
// They are 1 + k/2^8, k = 0 ... 255
// These 256 values are the table entries.
//
// Implementation
//===============
// CASE 1: |x-1| >= 2^-8
// C = frcpa(x)
// r = C * x - 1
//
// Form rseries = r + P1*r^2 + P2*r^3 + P3*r^4
//
// x = f * 2*n where f is 1.f_1f_2f_3....f_63
// Nfloat = float(n) where n is the true unbiased exponent
// pre-index = f_1f_2....f_8
// index = pre_index * 16
// get the dxt table entry at index + offset = T
//
// result = (T + Nfloat * log(2)) + rseries
//
// The T table is calculated as follows
// Form x_k = 1 + k/2^8 where k goes from 0... 255
// y_k = frcpa(x_k)
// log(1/y_k) in quad and round to double
// CASE 2: |x-1| < 2^-6
// w = x - 1
//
// Form wseries = w + Q1*w^2 + Q2*w^3 + Q3*w^4
//
// result = wseries
// Special values
//==============================================================
// log(+0) = -inf
// log(-0) = -inf
// log(+qnan) = +qnan
// log(-qnan) = -qnan
// log(+snan) = +qnan
// log(-snan) = -qnan
// log(-n) = QNAN Indefinite
// log(-inf) = QNAN Indefinite
// log(+inf) = +inf
// Registers used
//==============================================================
// Floating Point registers used:
// f8, input
// f9 -> f15, f32 -> f47
// General registers used:
// r32 -> r51
// Predicate registers used:
// p6 -> p15
// p8 log base e
// p6 log base e special
// p9 used in the frcpa
// p13 log base e large W
// p14 log base e small w
// p7 log base 10
// p10 log base 10 large W
// p11 log base 10 small w
// p12 log base 10 special
#include "libm_support.h"
// Assembly macros
//==============================================================
log_int_Nfloat = f9
log_Nfloat = f10
log_P3 = f11
log_P2 = f12
log_P1 = f13
log_inv_ln10 = f14
log_log2 = f15
log_w = f32
log_T = f33
log_rp_p32 = f34
log_rp_p2 = f35
log_rp_p10 = f36
log_rsq = f37
log_T_plus_Nlog2 = f38
log_r = f39
log_C = f40
log_rp_q32 = f41
log_rp_q2 = f42
log_rp_q10 = f43
log_wsq = f44
log_Q = f45
log_inv_ln10 = f46
log_NORM_f8 = f47
// ===================================
log_GR_exp_17_ones = r33
log_GR_exp_16_ones = r34
log_GR_exp_f8 = r35
log_GR_signexp_f8 = r36
log_GR_true_exp_f8 = r37
log_GR_significand_f8 = r38
log_GR_index = r39
log_AD_1 = r40
log_GR_signexp_w = r41
log_GR_fff7 = r42
log_AD_2 = r43
log_GR_exp_w = r44
GR_SAVE_B0 = r45
GR_SAVE_GP = r46
GR_SAVE_PFS = r47
GR_Parameter_X = r48
GR_Parameter_Y = r49
GR_Parameter_RESULT = r50
log_GR_tag = r51
// Data tables
//==============================================================
#ifdef _LIBC
.rodata
#else
.data
#endif
.align 16
log_table_1:
ASM_TYPE_DIRECTIVE(log_table_1,@object)
data8 0xbfd0001008f39d59 // p3
data8 0x3fd5556073e0c45a // p2
ASM_SIZE_DIRECTIVE(log_table_1)
log_table_2:
ASM_TYPE_DIRECTIVE(log_table_2,@object)
data8 0xbfdffffffffaea15 // p1
data8 0x3fdbcb7b1526e50e // 1/ln10
data8 0x3fe62e42fefa39ef // Log(2)
data8 0x0 // pad
data8 0x3F60040155D5889E //log(1/frcpa(1+ 0/256)
data8 0x3F78121214586B54 //log(1/frcpa(1+ 1/256)
data8 0x3F841929F96832F0 //log(1/frcpa(1+ 2/256)
data8 0x3F8C317384C75F06 //log(1/frcpa(1+ 3/256)
data8 0x3F91A6B91AC73386 //log(1/frcpa(1+ 4/256)
data8 0x3F95BA9A5D9AC039 //log(1/frcpa(1+ 5/256)
data8 0x3F99D2A8074325F4 //log(1/frcpa(1+ 6/256)
data8 0x3F9D6B2725979802 //log(1/frcpa(1+ 7/256)
data8 0x3FA0C58FA19DFAAA //log(1/frcpa(1+ 8/256)
data8 0x3FA2954C78CBCE1B //log(1/frcpa(1+ 9/256)
data8 0x3FA4A94D2DA96C56 //log(1/frcpa(1+ 10/256)
data8 0x3FA67C94F2D4BB58 //log(1/frcpa(1+ 11/256)
data8 0x3FA85188B630F068 //log(1/frcpa(1+ 12/256)
data8 0x3FAA6B8ABE73AF4C //log(1/frcpa(1+ 13/256)
data8 0x3FAC441E06F72A9E //log(1/frcpa(1+ 14/256)
data8 0x3FAE1E6713606D07 //log(1/frcpa(1+ 15/256)
data8 0x3FAFFA6911AB9301 //log(1/frcpa(1+ 16/256)
data8 0x3FB0EC139C5DA601 //log(1/frcpa(1+ 17/256)
data8 0x3FB1DBD2643D190B //log(1/frcpa(1+ 18/256)
data8 0x3FB2CC7284FE5F1C //log(1/frcpa(1+ 19/256)
data8 0x3FB3BDF5A7D1EE64 //log(1/frcpa(1+ 20/256)
data8 0x3FB4B05D7AA012E0 //log(1/frcpa(1+ 21/256)
data8 0x3FB580DB7CEB5702 //log(1/frcpa(1+ 22/256)
data8 0x3FB674F089365A7A //log(1/frcpa(1+ 23/256)
data8 0x3FB769EF2C6B568D //log(1/frcpa(1+ 24/256)
data8 0x3FB85FD927506A48 //log(1/frcpa(1+ 25/256)
data8 0x3FB9335E5D594989 //log(1/frcpa(1+ 26/256)
data8 0x3FBA2B0220C8E5F5 //log(1/frcpa(1+ 27/256)
data8 0x3FBB0004AC1A86AC //log(1/frcpa(1+ 28/256)
data8 0x3FBBF968769FCA11 //log(1/frcpa(1+ 29/256)
data8 0x3FBCCFEDBFEE13A8 //log(1/frcpa(1+ 30/256)
data8 0x3FBDA727638446A2 //log(1/frcpa(1+ 31/256)
data8 0x3FBEA3257FE10F7A //log(1/frcpa(1+ 32/256)
data8 0x3FBF7BE9FEDBFDE6 //log(1/frcpa(1+ 33/256)
data8 0x3FC02AB352FF25F4 //log(1/frcpa(1+ 34/256)
data8 0x3FC097CE579D204D //log(1/frcpa(1+ 35/256)
data8 0x3FC1178E8227E47C //log(1/frcpa(1+ 36/256)
data8 0x3FC185747DBECF34 //log(1/frcpa(1+ 37/256)
data8 0x3FC1F3B925F25D41 //log(1/frcpa(1+ 38/256)
data8 0x3FC2625D1E6DDF57 //log(1/frcpa(1+ 39/256)
data8 0x3FC2D1610C86813A //log(1/frcpa(1+ 40/256)
data8 0x3FC340C59741142E //log(1/frcpa(1+ 41/256)
data8 0x3FC3B08B6757F2A9 //log(1/frcpa(1+ 42/256)
data8 0x3FC40DFB08378003 //log(1/frcpa(1+ 43/256)
data8 0x3FC47E74E8CA5F7C //log(1/frcpa(1+ 44/256)
data8 0x3FC4EF51F6466DE4 //log(1/frcpa(1+ 45/256)
data8 0x3FC56092E02BA516 //log(1/frcpa(1+ 46/256)
data8 0x3FC5D23857CD74D5 //log(1/frcpa(1+ 47/256)
data8 0x3FC6313A37335D76 //log(1/frcpa(1+ 48/256)
data8 0x3FC6A399DABBD383 //log(1/frcpa(1+ 49/256)
data8 0x3FC70337DD3CE41B //log(1/frcpa(1+ 50/256)
data8 0x3FC77654128F6127 //log(1/frcpa(1+ 51/256)
data8 0x3FC7E9D82A0B022D //log(1/frcpa(1+ 52/256)
data8 0x3FC84A6B759F512F //log(1/frcpa(1+ 53/256)
data8 0x3FC8AB47D5F5A310 //log(1/frcpa(1+ 54/256)
data8 0x3FC91FE49096581B //log(1/frcpa(1+ 55/256)
data8 0x3FC981634011AA75 //log(1/frcpa(1+ 56/256)
data8 0x3FC9F6C407089664 //log(1/frcpa(1+ 57/256)
data8 0x3FCA58E729348F43 //log(1/frcpa(1+ 58/256)
data8 0x3FCABB55C31693AD //log(1/frcpa(1+ 59/256)
data8 0x3FCB1E104919EFD0 //log(1/frcpa(1+ 60/256)
data8 0x3FCB94EE93E367CB //log(1/frcpa(1+ 61/256)
data8 0x3FCBF851C067555F //log(1/frcpa(1+ 62/256)
data8 0x3FCC5C0254BF23A6 //log(1/frcpa(1+ 63/256)
data8 0x3FCCC000C9DB3C52 //log(1/frcpa(1+ 64/256)
data8 0x3FCD244D99C85674 //log(1/frcpa(1+ 65/256)
data8 0x3FCD88E93FB2F450 //log(1/frcpa(1+ 66/256)
data8 0x3FCDEDD437EAEF01 //log(1/frcpa(1+ 67/256)
data8 0x3FCE530EFFE71012 //log(1/frcpa(1+ 68/256)
data8 0x3FCEB89A1648B971 //log(1/frcpa(1+ 69/256)
data8 0x3FCF1E75FADF9BDE //log(1/frcpa(1+ 70/256)
data8 0x3FCF84A32EAD7C35 //log(1/frcpa(1+ 71/256)
data8 0x3FCFEB2233EA07CD //log(1/frcpa(1+ 72/256)
data8 0x3FD028F9C7035C1C //log(1/frcpa(1+ 73/256)
data8 0x3FD05C8BE0D9635A //log(1/frcpa(1+ 74/256)
data8 0x3FD085EB8F8AE797 //log(1/frcpa(1+ 75/256)
data8 0x3FD0B9C8E32D1911 //log(1/frcpa(1+ 76/256)
data8 0x3FD0EDD060B78081 //log(1/frcpa(1+ 77/256)
data8 0x3FD122024CF0063F //log(1/frcpa(1+ 78/256)
data8 0x3FD14BE2927AECD4 //log(1/frcpa(1+ 79/256)
data8 0x3FD180618EF18ADF //log(1/frcpa(1+ 80/256)
data8 0x3FD1B50BBE2FC63B //log(1/frcpa(1+ 81/256)
data8 0x3FD1DF4CC7CF242D //log(1/frcpa(1+ 82/256)
data8 0x3FD214456D0EB8D4 //log(1/frcpa(1+ 83/256)
data8 0x3FD23EC5991EBA49 //log(1/frcpa(1+ 84/256)
data8 0x3FD2740D9F870AFB //log(1/frcpa(1+ 85/256)
data8 0x3FD29ECDABCDFA04 //log(1/frcpa(1+ 86/256)
data8 0x3FD2D46602ADCCEE //log(1/frcpa(1+ 87/256)
data8 0x3FD2FF66B04EA9D4 //log(1/frcpa(1+ 88/256)
data8 0x3FD335504B355A37 //log(1/frcpa(1+ 89/256)
data8 0x3FD360925EC44F5D //log(1/frcpa(1+ 90/256)
data8 0x3FD38BF1C3337E75 //log(1/frcpa(1+ 91/256)
data8 0x3FD3C25277333184 //log(1/frcpa(1+ 92/256)
data8 0x3FD3EDF463C1683E //log(1/frcpa(1+ 93/256)
data8 0x3FD419B423D5E8C7 //log(1/frcpa(1+ 94/256)
data8 0x3FD44591E0539F49 //log(1/frcpa(1+ 95/256)
data8 0x3FD47C9175B6F0AD //log(1/frcpa(1+ 96/256)
data8 0x3FD4A8B341552B09 //log(1/frcpa(1+ 97/256)
data8 0x3FD4D4F3908901A0 //log(1/frcpa(1+ 98/256)
data8 0x3FD501528DA1F968 //log(1/frcpa(1+ 99/256)
data8 0x3FD52DD06347D4F6 //log(1/frcpa(1+ 100/256)
data8 0x3FD55A6D3C7B8A8A //log(1/frcpa(1+ 101/256)
data8 0x3FD5925D2B112A59 //log(1/frcpa(1+ 102/256)
data8 0x3FD5BF406B543DB2 //log(1/frcpa(1+ 103/256)
data8 0x3FD5EC433D5C35AE //log(1/frcpa(1+ 104/256)
data8 0x3FD61965CDB02C1F //log(1/frcpa(1+ 105/256)
data8 0x3FD646A84935B2A2 //log(1/frcpa(1+ 106/256)
data8 0x3FD6740ADD31DE94 //log(1/frcpa(1+ 107/256)
data8 0x3FD6A18DB74A58C5 //log(1/frcpa(1+ 108/256)
data8 0x3FD6CF31058670EC //log(1/frcpa(1+ 109/256)
data8 0x3FD6F180E852F0BA //log(1/frcpa(1+ 110/256)
data8 0x3FD71F5D71B894F0 //log(1/frcpa(1+ 111/256)
data8 0x3FD74D5AEFD66D5C //log(1/frcpa(1+ 112/256)
data8 0x3FD77B79922BD37E //log(1/frcpa(1+ 113/256)
data8 0x3FD7A9B9889F19E2 //log(1/frcpa(1+ 114/256)
data8 0x3FD7D81B037EB6A6 //log(1/frcpa(1+ 115/256)
data8 0x3FD8069E33827231 //log(1/frcpa(1+ 116/256)
data8 0x3FD82996D3EF8BCB //log(1/frcpa(1+ 117/256)
data8 0x3FD85855776DCBFB //log(1/frcpa(1+ 118/256)
data8 0x3FD8873658327CCF //log(1/frcpa(1+ 119/256)
data8 0x3FD8AA75973AB8CF //log(1/frcpa(1+ 120/256)
data8 0x3FD8D992DC8824E5 //log(1/frcpa(1+ 121/256)
data8 0x3FD908D2EA7D9512 //log(1/frcpa(1+ 122/256)
data8 0x3FD92C59E79C0E56 //log(1/frcpa(1+ 123/256)
data8 0x3FD95BD750EE3ED3 //log(1/frcpa(1+ 124/256)
data8 0x3FD98B7811A3EE5B //log(1/frcpa(1+ 125/256)
data8 0x3FD9AF47F33D406C //log(1/frcpa(1+ 126/256)
data8 0x3FD9DF270C1914A8 //log(1/frcpa(1+ 127/256)
data8 0x3FDA0325ED14FDA4 //log(1/frcpa(1+ 128/256)
data8 0x3FDA33440224FA79 //log(1/frcpa(1+ 129/256)
data8 0x3FDA57725E80C383 //log(1/frcpa(1+ 130/256)
data8 0x3FDA87D0165DD199 //log(1/frcpa(1+ 131/256)
data8 0x3FDAAC2E6C03F896 //log(1/frcpa(1+ 132/256)
data8 0x3FDADCCC6FDF6A81 //log(1/frcpa(1+ 133/256)
data8 0x3FDB015B3EB1E790 //log(1/frcpa(1+ 134/256)
data8 0x3FDB323A3A635948 //log(1/frcpa(1+ 135/256)
data8 0x3FDB56FA04462909 //log(1/frcpa(1+ 136/256)
data8 0x3FDB881AA659BC93 //log(1/frcpa(1+ 137/256)
data8 0x3FDBAD0BEF3DB165 //log(1/frcpa(1+ 138/256)
data8 0x3FDBD21297781C2F //log(1/frcpa(1+ 139/256)
data8 0x3FDC039236F08819 //log(1/frcpa(1+ 140/256)
data8 0x3FDC28CB1E4D32FD //log(1/frcpa(1+ 141/256)
data8 0x3FDC4E19B84723C2 //log(1/frcpa(1+ 142/256)
data8 0x3FDC7FF9C74554C9 //log(1/frcpa(1+ 143/256)
data8 0x3FDCA57B64E9DB05 //log(1/frcpa(1+ 144/256)
data8 0x3FDCCB130A5CEBB0 //log(1/frcpa(1+ 145/256)
data8 0x3FDCF0C0D18F326F //log(1/frcpa(1+ 146/256)
data8 0x3FDD232075B5A201 //log(1/frcpa(1+ 147/256)
data8 0x3FDD490246DEFA6B //log(1/frcpa(1+ 148/256)
data8 0x3FDD6EFA918D25CD //log(1/frcpa(1+ 149/256)
data8 0x3FDD9509707AE52F //log(1/frcpa(1+ 150/256)
data8 0x3FDDBB2EFE92C554 //log(1/frcpa(1+ 151/256)
data8 0x3FDDEE2F3445E4AF //log(1/frcpa(1+ 152/256)
data8 0x3FDE148A1A2726CE //log(1/frcpa(1+ 153/256)
data8 0x3FDE3AFC0A49FF40 //log(1/frcpa(1+ 154/256)
data8 0x3FDE6185206D516E //log(1/frcpa(1+ 155/256)
data8 0x3FDE882578823D52 //log(1/frcpa(1+ 156/256)
data8 0x3FDEAEDD2EAC990C //log(1/frcpa(1+ 157/256)
data8 0x3FDED5AC5F436BE3 //log(1/frcpa(1+ 158/256)
data8 0x3FDEFC9326D16AB9 //log(1/frcpa(1+ 159/256)
data8 0x3FDF2391A2157600 //log(1/frcpa(1+ 160/256)
data8 0x3FDF4AA7EE03192D //log(1/frcpa(1+ 161/256)
data8 0x3FDF71D627C30BB0 //log(1/frcpa(1+ 162/256)
data8 0x3FDF991C6CB3B379 //log(1/frcpa(1+ 163/256)
data8 0x3FDFC07ADA69A910 //log(1/frcpa(1+ 164/256)
data8 0x3FDFE7F18EB03D3E //log(1/frcpa(1+ 165/256)
data8 0x3FE007C053C5002E //log(1/frcpa(1+ 166/256)
data8 0x3FE01B942198A5A1 //log(1/frcpa(1+ 167/256)
data8 0x3FE02F74400C64EB //log(1/frcpa(1+ 168/256)
data8 0x3FE04360BE7603AD //log(1/frcpa(1+ 169/256)
data8 0x3FE05759AC47FE34 //log(1/frcpa(1+ 170/256)
data8 0x3FE06B5F1911CF52 //log(1/frcpa(1+ 171/256)
data8 0x3FE078BF0533C568 //log(1/frcpa(1+ 172/256)
data8 0x3FE08CD9687E7B0E //log(1/frcpa(1+ 173/256)
data8 0x3FE0A10074CF9019 //log(1/frcpa(1+ 174/256)
data8 0x3FE0B5343A234477 //log(1/frcpa(1+ 175/256)
data8 0x3FE0C974C89431CE //log(1/frcpa(1+ 176/256)
data8 0x3FE0DDC2305B9886 //log(1/frcpa(1+ 177/256)
data8 0x3FE0EB524BAFC918 //log(1/frcpa(1+ 178/256)
data8 0x3FE0FFB54213A476 //log(1/frcpa(1+ 179/256)
data8 0x3FE114253DA97D9F //log(1/frcpa(1+ 180/256)
data8 0x3FE128A24F1D9AFF //log(1/frcpa(1+ 181/256)
data8 0x3FE1365252BF0865 //log(1/frcpa(1+ 182/256)
data8 0x3FE14AE558B4A92D //log(1/frcpa(1+ 183/256)
data8 0x3FE15F85A19C765B //log(1/frcpa(1+ 184/256)
data8 0x3FE16D4D38C119FA //log(1/frcpa(1+ 185/256)
data8 0x3FE18203C20DD133 //log(1/frcpa(1+ 186/256)
data8 0x3FE196C7BC4B1F3B //log(1/frcpa(1+ 187/256)
data8 0x3FE1A4A738B7A33C //log(1/frcpa(1+ 188/256)
data8 0x3FE1B981C0C9653D //log(1/frcpa(1+ 189/256)
data8 0x3FE1CE69E8BB106B //log(1/frcpa(1+ 190/256)
data8 0x3FE1DC619DE06944 //log(1/frcpa(1+ 191/256)
data8 0x3FE1F160A2AD0DA4 //log(1/frcpa(1+ 192/256)
data8 0x3FE2066D7740737E //log(1/frcpa(1+ 193/256)
data8 0x3FE2147DBA47A394 //log(1/frcpa(1+ 194/256)
data8 0x3FE229A1BC5EBAC3 //log(1/frcpa(1+ 195/256)
data8 0x3FE237C1841A502E //log(1/frcpa(1+ 196/256)
data8 0x3FE24CFCE6F80D9A //log(1/frcpa(1+ 197/256)
data8 0x3FE25B2C55CD5762 //log(1/frcpa(1+ 198/256)
data8 0x3FE2707F4D5F7C41 //log(1/frcpa(1+ 199/256)
data8 0x3FE285E0842CA384 //log(1/frcpa(1+ 200/256)
data8 0x3FE294294708B773 //log(1/frcpa(1+ 201/256)
data8 0x3FE2A9A2670AFF0C //log(1/frcpa(1+ 202/256)
data8 0x3FE2B7FB2C8D1CC1 //log(1/frcpa(1+ 203/256)
data8 0x3FE2C65A6395F5F5 //log(1/frcpa(1+ 204/256)
data8 0x3FE2DBF557B0DF43 //log(1/frcpa(1+ 205/256)
data8 0x3FE2EA64C3F97655 //log(1/frcpa(1+ 206/256)
data8 0x3FE3001823684D73 //log(1/frcpa(1+ 207/256)
data8 0x3FE30E97E9A8B5CD //log(1/frcpa(1+ 208/256)
data8 0x3FE32463EBDD34EA //log(1/frcpa(1+ 209/256)
data8 0x3FE332F4314AD796 //log(1/frcpa(1+ 210/256)
data8 0x3FE348D90E7464D0 //log(1/frcpa(1+ 211/256)
data8 0x3FE35779F8C43D6E //log(1/frcpa(1+ 212/256)
data8 0x3FE36621961A6A99 //log(1/frcpa(1+ 213/256)
data8 0x3FE37C299F3C366A //log(1/frcpa(1+ 214/256)
data8 0x3FE38AE2171976E7 //log(1/frcpa(1+ 215/256)
data8 0x3FE399A157A603E7 //log(1/frcpa(1+ 216/256)
data8 0x3FE3AFCCFE77B9D1 //log(1/frcpa(1+ 217/256)
data8 0x3FE3BE9D503533B5 //log(1/frcpa(1+ 218/256)
data8 0x3FE3CD7480B4A8A3 //log(1/frcpa(1+ 219/256)
data8 0x3FE3E3C43918F76C //log(1/frcpa(1+ 220/256)
data8 0x3FE3F2ACB27ED6C7 //log(1/frcpa(1+ 221/256)
data8 0x3FE4019C2125CA93 //log(1/frcpa(1+ 222/256)
data8 0x3FE4181061389722 //log(1/frcpa(1+ 223/256)
data8 0x3FE42711518DF545 //log(1/frcpa(1+ 224/256)
data8 0x3FE436194E12B6BF //log(1/frcpa(1+ 225/256)
data8 0x3FE445285D68EA69 //log(1/frcpa(1+ 226/256)
data8 0x3FE45BCC464C893A //log(1/frcpa(1+ 227/256)
data8 0x3FE46AED21F117FC //log(1/frcpa(1+ 228/256)
data8 0x3FE47A1527E8A2D3 //log(1/frcpa(1+ 229/256)
data8 0x3FE489445EFFFCCC //log(1/frcpa(1+ 230/256)
data8 0x3FE4A018BCB69835 //log(1/frcpa(1+ 231/256)
data8 0x3FE4AF5A0C9D65D7 //log(1/frcpa(1+ 232/256)
data8 0x3FE4BEA2A5BDBE87 //log(1/frcpa(1+ 233/256)
data8 0x3FE4CDF28F10AC46 //log(1/frcpa(1+ 234/256)
data8 0x3FE4DD49CF994058 //log(1/frcpa(1+ 235/256)
data8 0x3FE4ECA86E64A684 //log(1/frcpa(1+ 236/256)
data8 0x3FE503C43CD8EB68 //log(1/frcpa(1+ 237/256)
data8 0x3FE513356667FC57 //log(1/frcpa(1+ 238/256)
data8 0x3FE522AE0738A3D8 //log(1/frcpa(1+ 239/256)
data8 0x3FE5322E26867857 //log(1/frcpa(1+ 240/256)
data8 0x3FE541B5CB979809 //log(1/frcpa(1+ 241/256)
data8 0x3FE55144FDBCBD62 //log(1/frcpa(1+ 242/256)
data8 0x3FE560DBC45153C7 //log(1/frcpa(1+ 243/256)
data8 0x3FE5707A26BB8C66 //log(1/frcpa(1+ 244/256)
data8 0x3FE587F60ED5B900 //log(1/frcpa(1+ 245/256)
data8 0x3FE597A7977C8F31 //log(1/frcpa(1+ 246/256)
data8 0x3FE5A760D634BB8B //log(1/frcpa(1+ 247/256)
data8 0x3FE5B721D295F10F //log(1/frcpa(1+ 248/256)
data8 0x3FE5C6EA94431EF9 //log(1/frcpa(1+ 249/256)
data8 0x3FE5D6BB22EA86F6 //log(1/frcpa(1+ 250/256)
data8 0x3FE5E6938645D390 //log(1/frcpa(1+ 251/256)
data8 0x3FE5F673C61A2ED2 //log(1/frcpa(1+ 252/256)
data8 0x3FE6065BEA385926 //log(1/frcpa(1+ 253/256)
data8 0x3FE6164BFA7CC06B //log(1/frcpa(1+ 254/256)
data8 0x3FE62643FECF9743 //log(1/frcpa(1+ 255/256)
ASM_SIZE_DIRECTIVE(log_table_2)
.align 32
.global logf#
.global log10f#
// log10 has p7 true, p8 false
// log has p8 true, p7 false
.section .text
.proc log10f#
.align 32
log10f:
#ifdef _LIBC
.global __ieee754_log10f
.type __ieee754_log10f,@function
__ieee754_log10f:
#endif
{ .mfi
alloc r32=ar.pfs,1,15,4,0
frcpa.s1 log_C,p9 = f1,f8
cmp.eq.unc p7,p8 = r0, r0
}
{ .mfb
addl log_AD_1 = @ltoff(log_table_1), gp
fnorm.s1 log_NORM_f8 = f8
br.sptk L(LOG_LOG10_X)
}
;;
.endp log10f
ASM_SIZE_DIRECTIVE(log10f)
ASM_SIZE_DIRECTIVE(__ieee754_log10f)
.section .text
.proc logf#
.align 32
logf:
#ifdef _LIBC
.global __ieee754_logf
.type __ieee754_logf,@function
__ieee754_logf:
#endif
{ .mfi
alloc r32=ar.pfs,1,15,4,0
frcpa.s1 log_C,p9 = f1,f8
cmp.eq.unc p8,p7 = r0, r0
}
{ .mfi
addl log_AD_1 = @ltoff(log_table_1), gp
fnorm.s1 log_NORM_f8 = f8
nop.i 999
}
;;
L(LOG_LOG10_X):
{ .mfi
getf.exp log_GR_signexp_f8 = f8 // If x unorm then must recompute
fclass.m.unc p15,p0 = f8, 0x0b // Test for x=unorm
mov log_GR_fff7 = 0xfff7
}
{ .mfi
ld8 log_AD_1 = [log_AD_1]
fms.s1 log_w = f8,f1,f1
mov log_GR_exp_17_ones = 0x1ffff
}
;;
{ .mmi
getf.sig log_GR_significand_f8 = f8 // If x unorm then must recompute
mov log_GR_exp_16_ones = 0xffff
nop.i 999
}
;;
{ .mmb
adds log_AD_2 = 0x10, log_AD_1
and log_GR_exp_f8 = log_GR_signexp_f8, log_GR_exp_17_ones
(p15) br.cond.spnt L(LOG_DENORM)
}
;;
L(LOG_COMMON):
{.mfi
ldfpd log_P3,log_P2 = [log_AD_1],16
fclass.m.unc p6,p0 = f8, 0xc3 // Test for x=nan
shl log_GR_index = log_GR_significand_f8,1
}
{.mfi
sub log_GR_true_exp_f8 = log_GR_exp_f8, log_GR_exp_16_ones
nop.f 999
nop.i 999
}
;;
{ .mfi
ldfpd log_P1,log_inv_ln10 = [log_AD_2],16
fclass.m.unc p11,p0 = f8, 0x21 // Test for x=+inf
shr.u log_GR_index = log_GR_index,56
}
{ .mfi
setf.sig log_int_Nfloat = log_GR_true_exp_f8
nop.f 999
nop.i 999
}
;;
{ .mfi
ldfd log_log2 = [log_AD_2],16
fma.s1 log_wsq = log_w, log_w, f0
nop.i 999
}
{ .mfb
nop.m 999
(p6) fma.s.s0 f8 = f8,f1,f0 // quietize nan result if x=nan
(p6) br.ret.spnt b0 // Exit for x=nan
}
;;
{ .mfi
shladd log_AD_2 = log_GR_index,3,log_AD_2
fcmp.eq.s1 p10,p0 = log_NORM_f8, f1 // Test for x=+1.0
nop.i 999
}
{ .mfb
nop.m 999
fms.s1 log_r = log_C,f8,f1
(p11) br.ret.spnt b0 // Exit for x=+inf
}
;;
{ .mmf
nop.m 999
nop.m 999
fclass.m.unc p6,p0 = f8, 0x07 // Test for x=0
}
;;
{ .mfb
ldfd log_T = [log_AD_2]
(p10) fmerge.s f8 = f0, f0
(p10) br.ret.spnt b0 // Exit for x=1.0
;;
}
{ .mfi
getf.exp log_GR_signexp_w = log_w
fclass.m.unc p12,p0 = f8, 0x3a // Test for x neg norm, unorm, inf
nop.i 999
}
;;
{ .mmb
nop.m 999
nop.m 999
(p6) br.cond.spnt L(LOG_ZERO_NEG) // Branch if x=0
;;
}
{ .mfi
and log_GR_exp_w = log_GR_exp_17_ones, log_GR_signexp_w
nop.f 999
nop.i 999
}
{ .mfb
nop.m 999
fma.s1 log_rsq = log_r, log_r, f0
(p12) br.cond.spnt L(LOG_ZERO_NEG) // Branch if x<0
;;
}
{ .mfi
nop.m 999
fma.s1 log_rp_p32 = log_P3, log_r, log_P2
nop.i 999
}
{ .mfi
nop.m 999
fma.s1 log_rp_q32 = log_P3, log_w, log_P2
nop.i 999
;;
}
{ .mfi
nop.m 999
fcvt.xf log_Nfloat = log_int_Nfloat
nop.i 999 ;;
}
{ .mfi
nop.m 999
fma.s1 log_rp_p10 = log_P1, log_r, f1
nop.i 999
}
{ .mfi
nop.m 999
fma.s1 log_rp_q10 = log_P1, log_w, f1
nop.i 999
;;
}
// p13 <== large w log
// p14 <== small w log
{ .mfi
(p8) cmp.ge.unc p13,p14 = log_GR_exp_w, log_GR_fff7
fcmp.eq.s0 p6,p0 = f8,f0 // Sets flag on +denormal input
nop.i 999
;;
}
// p10 <== large w log10
// p11 <== small w log10
{ .mfi
(p7) cmp.ge.unc p10,p11 = log_GR_exp_w, log_GR_fff7
nop.f 999
nop.i 999 ;;
}
{ .mfi
nop.m 999
fma.s1 log_T_plus_Nlog2 = log_Nfloat,log_log2, log_T
nop.i 999 ;;
}
{ .mfi
nop.m 999
fma.s1 log_rp_p2 = log_rp_p32, log_rsq, log_rp_p10
nop.i 999
}
{ .mfi
nop.m 999
fma.s1 log_rp_q2 = log_rp_q32, log_wsq, log_rp_q10
nop.i 999
;;
}
// small w, log <== p14
{ .mfi
nop.m 999
(p14) fma.s f8 = log_rp_q2, log_w, f0
nop.i 999
}
{ .mfi
nop.m 999
(p11) fma.s1 log_Q = log_rp_q2, log_w, f0
nop.i 999 ;;
}
// large w, log <== p13
.pred.rel "mutex",p13,p10
{ .mfi
nop.m 999
(p13) fma.s f8 = log_rp_p2, log_r, log_T_plus_Nlog2
nop.i 999
}
{ .mfi
nop.m 999
(p10) fma.s1 log_Q = log_rp_p2, log_r, log_T_plus_Nlog2
nop.i 999 ;;
}
// log10
{ .mfb
nop.m 999
(p7) fma.s f8 = log_inv_ln10,log_Q,f0
br.ret.sptk b0
;;
}
L(LOG_DENORM):
{ .mmi
getf.exp log_GR_signexp_f8 = log_NORM_f8
nop.m 999
nop.i 999
}
;;
{ .mmb
getf.sig log_GR_significand_f8 = log_NORM_f8
and log_GR_exp_f8 = log_GR_signexp_f8, log_GR_exp_17_ones
br.cond.sptk L(LOG_COMMON)
}
;;
L(LOG_ZERO_NEG):
// qnan snan inf norm unorm 0 -+
// 0 0 0 0 0 1 11 0x7
// 0 0 1 1 1 0 10 0x3a
// Save x (f8) in f10
{ .mfi
nop.m 999
fmerge.s f10 = f8,f8
nop.i 999 ;;
}
// p8 p9 means ln(+-0) = -inf
// p7 p10 means log(+-0) = -inf
// p13 means ln(-)
// p14 means log(-)
{ .mfi
nop.m 999
fmerge.ns f6 = f1,f1 // Form -1.0
nop.i 999 ;;
}
// p9 means ln(+-0) = -inf
// p10 means log(+-0) = -inf
// Log(+-0) = -inf
{ .mfi
nop.m 999
(p8) fclass.m.unc p9,p0 = f10, 0x07
nop.i 999
}
{ .mfi
nop.m 999
(p7) fclass.m.unc p10,p0 = f10, 0x07
nop.i 999 ;;
}
// p13 ln(-)
// p14 log(-)
// Log(-inf, -normal, -unnormal) = QNAN indefinite
{ .mfi
nop.m 999
(p8) fclass.m.unc p13,p0 = f10, 0x3a
nop.i 999
}
{ .mfi
nop.m 999
(p7) fclass.m.unc p14,p0 = f10, 0x3a
nop.i 999 ;;
}
.pred.rel "mutex",p9,p10
{ .mfi
(p9) mov log_GR_tag = 4
(p9) frcpa f8,p11 = f6,f0
nop.i 999
}
{ .mfi
(p10) mov log_GR_tag = 10
(p10) frcpa f8,p12 = f6,f0
nop.i 999 ;;
}
.pred.rel "mutex",p13,p14
{ .mfi
(p13) mov log_GR_tag = 5
(p13) frcpa f8,p11 = f0,f0
nop.i 999
}
{ .mfb
(p14) mov log_GR_tag = 11
(p14) frcpa f8,p12 = f0,f0
br.cond.sptk __libm_error_region ;;
}
.endp logf
ASM_SIZE_DIRECTIVE(logf)
ASM_SIZE_DIRECTIVE(__ieee754_logf)
// Stack operations when calling error support.
// (1) (2) (3) (call) (4)
// sp -> + psp -> + psp -> + sp -> +
// | | | |
// | | <- GR_Y R3 ->| <- GR_RESULT | -> f8
// | | | |
// | <-GR_Y Y2->| Y2 ->| <- GR_Y |
// | | | |
// | | <- GR_X X1 ->| |
// | | | |
// sp-64 -> + sp -> + sp -> + +
// save ar.pfs save b0 restore gp
// save gp restore ar.pfs
.proc __libm_error_region
__libm_error_region:
.prologue
// (1)
{ .mfi
add GR_Parameter_Y=-32,sp // Parameter 2 value
nop.f 0
.save ar.pfs,GR_SAVE_PFS
mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
}
{ .mfi
.fframe 64
add sp=-64,sp // Create new stack
nop.f 0
mov GR_SAVE_GP=gp // Save gp
};;
// (2)
{ .mmi
stfs [GR_Parameter_Y] = f1,16 // STORE Parameter 2 on stack
add GR_Parameter_X = 16,sp // Parameter 1 address
.save b0, GR_SAVE_B0
mov GR_SAVE_B0=b0 // Save b0
};;
.body
// (3)
{ .mib
stfs [GR_Parameter_X] = f10 // STORE Parameter 1 on stack
add GR_Parameter_RESULT = 0,GR_Parameter_Y // Parameter 3 address
nop.b 0
}
{ .mib
stfs [GR_Parameter_Y] = f8 // STORE Parameter 3 on stack
add GR_Parameter_Y = -16,GR_Parameter_Y
br.call.sptk b0=__libm_error_support# // Call error handling function
};;
{ .mmi
nop.m 0
nop.m 0
add GR_Parameter_RESULT = 48,sp
};;
// (4)
{ .mmi
ldfs f8 = [GR_Parameter_RESULT] // Get return result off stack
.restore sp
add sp = 64,sp // Restore stack pointer
mov b0 = GR_SAVE_B0 // Restore return address
};;
{ .mib
mov gp = GR_SAVE_GP // Restore gp
mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
br.ret.sptk b0 // Return
};;
.endp __libm_error_region
ASM_SIZE_DIRECTIVE(__libm_error_region)
.type __libm_error_support#,@function
.global __libm_error_support#

View File

@ -0,0 +1 @@
/* Not needed. */

2309
sysdeps/ia64/fpu/e_pow.S Normal file

File diff suppressed because it is too large Load Diff

2309
sysdeps/ia64/fpu/e_powf.S Normal file

File diff suppressed because it is too large Load Diff

3437
sysdeps/ia64/fpu/e_powl.S Normal file

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1 @@
/* Not needed. */

View File

@ -0,0 +1 @@
/* Not needed. */

View File

@ -0,0 +1,592 @@
.file "remainder.asm"
// Copyright (c) 2000, 2001, Intel Corporation
// All rights reserved.
//
// Contributed 2/2/2000 by John Harrison, Cristina Iordache, Ted Kubaska, Bob Norin,
// Shane Story, and Ping Tak Peter Tang of the Computational Software Lab,
// Intel Corporation.
//
// WARRANTY DISCLAIMER
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
// http://developer.intel.com/opensource.
//
// History
//====================================================================
// 2/02/00 Initial version
// 3/02/00 New Algorithm
// 4/04/00 Unwind support added
// 7/21/00 Fixed quotient=2^{24*m+23}*1.q1...q23 1 bug
// 8/15/00 Bundle added after call to __libm_error_support to properly
// set [the previously overwritten] GR_Parameter_RESULT.
//11/29/00 Set FR_Y to f9
//
// API
//====================================================================
// double remainder(double,double);
//
// Overview of operation
//====================================================================
// remainder(a,b)=a-i*b,
// where i is an integer such that, if b!=0 and a is finite,
// |a/b-i|<=1/2. If |a/b-i|=1/2, i is even.
//
// Algorithm
//====================================================================
// a). eliminate special cases
// b). if |a/b|<0.25 (first quotient estimate), return a
// c). use single precision divide algorithm to get quotient q
// rounded to 24 bits of precision
// d). calculate partial remainders (using both q and q-ulp);
// select one and RZ(a/b) based on the sign of |a|-|b|*q
// e). if the exponent difference (exponent(a)-exponent(b))
// is less than 24 (quotient estimate<2^{24}-2), use RZ(a/b)
// and sticky bits to round to integer; exit loop and
// calculate final remainder
// f). if exponent(a)-exponent(b)>=24, select new value of a as
// the partial remainder calculated using RZ(a/b);
// repeat from c).
//
// Special cases
//====================================================================
// a=+/- Inf, or b=+/-0: return NaN, call libm_error_support
// a=NaN or b=NaN: return NaN
#include "libm_support.h"
// Registers used
//====================================================================
// Predicate registers: p6-p14
// General registers: r2,r3,r28,r29,r32 (ar.pfs), r33-r39
// Floating point registers: f6-f15,f32
.section .text
GR_SAVE_B0 = r33
GR_SAVE_PFS = r34
GR_SAVE_GP = r35
GR_SAVE_SP = r36
GR_Parameter_X = r37
GR_Parameter_Y = r38
GR_Parameter_RESULT = r39
GR_Parameter_TAG = r40
FR_X = f10
FR_Y = f9
FR_RESULT = f8
.proc remainder#
.align 32
.global remainder#
.align 32
remainder:
#ifdef _LIBC
.global __remainder
.type __remainder,@function
__remainder:
#endif
// inputs in f8, f9
// result in f8
{ .mfi
alloc r32=ar.pfs,1,4,4,0
// f13=|a|
fmerge.s f13=f0,f8
nop.i 0
}
{.mfi
nop.m 0
// f14=|b|
fmerge.s f14=f0,f9
nop.i 0;;
}
{.mlx
mov r28=0x2ffdd
// r2=2^{23}
movl r3=0x4b000000;;
}
// Y +-NAN, +-inf, +-0? p11
{ .mfi
setf.exp f32=r28
(p0) fclass.m.unc p11,p0 = f9, 0xe7
nop.i 999
}
// qnan snan inf norm unorm 0 -+
// 1 1 1 0 0 0 11
// e 3
// X +-NAN, +-inf, ? p9
{ .mfi
nop.m 999
(p0) fclass.m.unc p9,p0 = f8, 0xe3
nop.i 999;;
}
{.mfi
nop.m 0
mov f12=f0
nop.i 0
}
{ .mfi
// set p7=1
cmp.eq.unc p7,p0=r0,r0
// Step (1)
// y0 = 1 / b in f10
frcpa.s1 f10,p6=f13,f14
nop.i 0;;
}
{.bbb
(p9) br.cond.spnt L(FREM_X_NAN_INF)
(p11) br.cond.spnt L(FREM_Y_NAN_INF_ZERO)
nop.b 0
} {.mfi
nop.m 0
// set D flag if a (f8) is denormal
fnma.s0 f6=f8,f1,f8
nop.i 0;;
}
L(remloop24):
{ .mfi
nop.m 0
// Step (2)
// q0 = a * y0 in f12
(p6) fma.s1 f12=f13,f10,f0
nop.i 0
} { .mfi
nop.m 0
// Step (3)
// e0 = 1 - b * y0 in f7
(p6) fnma.s1 f7=f14,f10,f1
nop.i 0;;
} {.mlx
nop.m 0
// r2=1.25*2^{-24}
movl r2=0x33a00000;;
}
{.mfi
nop.m 0
// q1=q0*(1+e0)
fma.s1 f15=f12,f7,f12
nop.i 0
}
{ .mfi
nop.m 0
// Step (4)
// e1 = e0 * e0 + E in f7
(p6) fma.s1 f7=f7,f7,f32
nop.i 0;;
}
{.mii
(p7) getf.exp r29=f12
(p7) mov r28=0xfffd
nop.i 0;;
}
{ .mfi
// f12=2^{23}
setf.s f12=r3
// Step (5)
// q2 = q1 + e1 * q1 in f11
(p6) fma.s.s1 f11=f7,f15,f15
nop.i 0
} { .mfi
nop.m 0
// Step (6)
// q2 = q1 + e1 * q1 in f6
(p6) fma.s1 f6=f7,f15,f15
nop.i 0;;
}
{.mmi
// f15=1.25*2^{-24}
setf.s f15=r2
// q<1/4 ? (i.e. expon< -2)
(p7) cmp.gt p7,p0=r28,r29
nop.i 0;;
}
{.mfb
// r29= -32+bias
mov r29=0xffdf
// if |a/b|<1/4, set D flag before returning
(p7) fma.d.s0 f9=f9,f0,f8
nop.b 0;;
}
{.mfb
nop.m 0
// can be combined with bundle above if sign of 0 or
// FTZ enabled are not important
(p7) fmerge.s f8=f8,f9
// return if |a|<4*|b| (estimated quotient < 1/4)
(p7) br.ret.spnt b0;;
}
{.mfi
// f7=2^{-32}
setf.exp f7=r29
// set f8 to current a value | sign
fmerge.s f8=f8,f13
nop.i 0;;
}
{.mfi
getf.exp r28=f6
// last step ? (q<2^{23})
fcmp.lt.unc.s1 p0,p12=f6,f12
nop.i 0;;
}
{.mfi
nop.m 0
// r=a-b*q
fnma.s1 f6=f14,f11,f13
nop.i 0
} {.mfi
// r2=23+bias
mov r2=0xffff+23
// q'=q-q*(1.25*2^{-24}) (q'=q-ulp)
fnma.s.s1 f15=f11,f15,f11
nop.i 0;;
}
{.mmi
nop.m 0
cmp.eq p11,p14=r2,r28
nop.i 0;;
}
.pred.rel "mutex",p11,p14
{.mfi
nop.m 0
// if exp_q=2^23, then r=a-b*2^{23}
(p11) fnma.s1 f13=f12,f14,f13
nop.i 0
}
{.mfi
nop.m 0
// r2=a-b*q'
(p14) fnma.s1 f13=f14,f15,f13
nop.i 0;;
}
{.mfi
nop.m 0
// r>0 iff q=RZ(a/b) and inexact
fcmp.gt.unc.s1 p8,p0=f6,f0
nop.i 0
} {.mfi
nop.m 0
// r<0 iff q'=RZ(a/b) and inexact
(p14) fcmp.lt.unc.s1 p9,p10=f6,f0
nop.i 0;;
}
.pred.rel "mutex",p8,p9
{.mfi
nop.m 0
// (p8) Q=q+(last iteration ? sticky bits:0)
// i.e. Q=q+q*x (x=2^{-32} or 0)
(p8) fma.s1 f11=f11,f7,f11
nop.i 0
} {.mfi
nop.m 0
// (p9) Q=q'+(last iteration ? sticky bits:0)
// i.e. Q=q'+q'*x (x=2^{-32} or 0)
(p9) fma.s1 f11=f15,f7,f15
nop.i 0;;
}
{.mfb
nop.m 0
// (p9) set r=r2 (new a, if not last iteration)
// (p10) new a =r
(p10) mov f13=f6
(p12) br.cond.sptk L(remloop24);;
}
// last iteration
{.mfi
nop.m 0
// set f9=|b|*sgn(a)
fmerge.s f9=f8,f9
nop.i 0
}
{.mfi
nop.m 0
// round to integer
fcvt.fx.s1 f11=f11
nop.i 0;;
}
{.mfi
nop.m 0
// save sign of a
fmerge.s f7=f8,f8
nop.i 0
} {.mfi
nop.m 0
// normalize
fcvt.xf f11=f11
nop.i 0;;
}
{.mfi
nop.m 0
// This can be removed if sign of 0 is not important
// get remainder using sf1
fnma.d.s1 f12=f9,f11,f8
nop.i 0
}
{.mfi
nop.m 0
// get remainder
fnma.d.s0 f8=f9,f11,f8
nop.i 0;;
}
{.mfi
nop.m 0
// f12=0?
// This can be removed if sign of 0 is not important
fcmp.eq.unc.s1 p8,p0=f12,f0
nop.i 0;;
}
{.mfb
nop.m 0
// if f8=0, set sign correctly
// This can be removed if sign of 0 is not important
(p8) fmerge.s f8=f7,f8
// return
br.ret.sptk b0;;
}
L(FREM_X_NAN_INF):
// Y zero ?
{.mfi
nop.m 0
fma.s1 f10=f9,f1,f0
nop.i 0;;
}
{.mfi
nop.m 0
fcmp.eq.unc.s1 p11,p0=f10,f0
nop.i 0;;
}
{.mib
nop.m 0
nop.i 0
// if Y zero
(p11) br.cond.spnt L(FREM_Y_ZERO);;
}
// X infinity? Return QNAN indefinite
{ .mfi
nop.m 999
(p0) fclass.m.unc p8,p0 = f8, 0x23
nop.i 999
}
// X infinity? Return QNAN indefinite
{ .mfi
nop.m 999
(p0) fclass.m.unc p11,p0 = f8, 0x23
nop.i 999;;
}
// Y NaN ?
{.mfi
nop.m 999
(p8) fclass.m.unc p0,p8=f9,0xc3
nop.i 0;;
}
{.mfi
nop.m 999
// also set Denormal flag if necessary
(p8) fma.s0 f9=f9,f1,f0
nop.i 0
}
{ .mfi
nop.m 999
(p8) frcpa.s0 f8,p7 = f8,f8
nop.i 999 ;;
}
{.mfi
nop.m 999
(p11) mov f10=f8
nop.i 0
}
{ .mfi
nop.m 999
(p8) fma.d f8=f8,f1,f0
nop.i 0 ;;
}
{ .mfb
nop.m 999
frcpa.s0 f8,p7=f8,f9
(p11) br.cond.spnt L(EXP_ERROR_RETURN);;
}
{ .mib
nop.m 0
nop.i 0
br.ret.spnt b0 ;;
}
L(FREM_Y_NAN_INF_ZERO):
// Y INF
{ .mfi
nop.m 999
(p0) fclass.m.unc p7,p0 = f9, 0x23
nop.i 999 ;;
}
{ .mfb
nop.m 999
(p7) fma.d f8=f8,f1,f0
(p7) br.ret.spnt b0 ;;
}
// Y NAN?
{ .mfi
nop.m 999
(p0) fclass.m.unc p9,p0 = f9, 0xc3
nop.i 999 ;;
}
{ .mfb
nop.m 999
(p9) fma.d f8=f9,f1,f0
(p9) br.ret.spnt b0 ;;
}
L(FREM_Y_ZERO):
// Y zero? Must be zero at this point
// because it is the only choice left.
// Return QNAN indefinite
// X NAN?
{ .mfi
nop.m 999
(p0) fclass.m.unc p9,p10 = f8, 0xc3
nop.i 999 ;;
}
{ .mfi
nop.m 999
(p10) fclass.nm p9,p10 = f8, 0xff
nop.i 999 ;;
}
{.mfi
nop.m 999
(p9) frcpa f11,p7=f8,f0
nop.i 0;;
}
{ .mfi
nop.m 999
(p10) frcpa f11,p7 = f0,f0
nop.i 999;;
}
{ .mfi
nop.m 999
(p0) fmerge.s f10 = f8, f8
nop.i 999
}
{ .mfi
nop.m 999
(p0) fma.d f8=f11,f1,f0
nop.i 999
}
L(EXP_ERROR_RETURN):
{ .mib
(p0) mov GR_Parameter_TAG = 124
nop.i 999
(p0) br.sptk __libm_error_region;;
}
.endp remainder
ASM_SIZE_DIRECTIVE(remainder)
#ifdef _LIBC
ASM_SIZE_DIRECTIVE(__remainder)
#endif
.proc __libm_error_region
__libm_error_region:
.prologue
{ .mfi
add GR_Parameter_Y=-32,sp // Parameter 2 value
nop.f 0
.save ar.pfs,GR_SAVE_PFS
mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
}
{ .mfi
.fframe 64
add sp=-64,sp // Create new stack
nop.f 0
mov GR_SAVE_GP=gp // Save gp
};;
{ .mmi
stfd [GR_Parameter_Y] = FR_Y,16 // Save Parameter 2 on stack
add GR_Parameter_X = 16,sp // Parameter 1 address
.save b0, GR_SAVE_B0
mov GR_SAVE_B0=b0 // Save b0
};;
.body
{ .mib
stfd [GR_Parameter_X] = FR_X // Store Parameter 1 on stack
add GR_Parameter_RESULT = 0,GR_Parameter_Y
nop.b 0 // Parameter 3 address
}
{ .mib
stfd [GR_Parameter_Y] = FR_RESULT // Store Parameter 3 on stack
add GR_Parameter_Y = -16,GR_Parameter_Y
br.call.sptk b0=__libm_error_support# // Call error handling function
};;
{ .mmi
nop.m 0
nop.m 0
add GR_Parameter_RESULT = 48,sp
};;
{ .mmi
ldfd f8 = [GR_Parameter_RESULT] // Get return result off stack
.restore sp
add sp = 64,sp // Restore stack pointer
mov b0 = GR_SAVE_B0 // Restore return address
};;
{ .mib
mov gp = GR_SAVE_GP // Restore gp
mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
br.ret.sptk b0 // Return
};;
.endp __libm_error_region
ASM_SIZE_DIRECTIVE(__libm_error_region)
.type __libm_error_support#,@function
.global __libm_error_support#

View File

@ -0,0 +1,611 @@
.file "remainderf.asm"
// Copyright (c) 2000, 2001, Intel Corporation
// All rights reserved.
//
// Contributed 2/2/2000 by John Harrison, Cristina Iordache, Ted Kubaska,
// Bob Norin, Shane Story, and Ping Tak Peter Tang of the Computational
// Software Lab,
// Intel Corporation.
//
// WARRANTY DISCLAIMER
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
// http://developer.intel.com/opensource.
//
// History
//====================================================================
// 2/02/00 Initial version
// 3/02/00 New algorithm
// 4/04/00 Unwind support added
// 7/21/00 Fixed quotient=2^{24*m+23} bug
// 8/15/00 Bundle added after call to __libm_error_support to properly
// set [the previously overwritten] GR_Parameter_RESULT.
//11/29/00 Set FR_Y to f9
//
// API
//====================================================================
// float remainderf(float,float);
//
// Overview of operation
//====================================================================
// remainder(a,b)=a-i*b,
// where i is an integer such that, if b!=0 and a is finite,
// |a/b-i|<=1/2. If |a/b-i|=1/2, i is even.
//
// Algorithm
//====================================================================
// a). eliminate special cases
// b). if |a/b|<0.25 (first quotient estimate), return a
// c). use single precision divide algorithm to get quotient q
// rounded to 24 bits of precision
// d). calculate partial remainders (using both q and q-ulp);
// select one and RZ(a/b) based on the sign of |a|-|b|*q
// e). if the exponent difference (exponent(a)-exponent(b))
// is less than 24 (quotient estimate<2^{24}-2), use RZ(a/b)
// and sticky bits to round to integer; exit loop and
// calculate final remainder
// f). if exponent(a)-exponent(b)>=24, select new value of a as
// the partial remainder calculated using RZ(a/b);
// repeat from c).
//
// Special cases
//====================================================================
// a=+/- Inf, or b=+/-0: return NaN, call libm_error_support
// a=NaN or b=NaN: return NaN
#include "libm_support.h"
//
// Registers used
//====================================================================
// Predicate registers: p6-p12
// General registers: r2,r3,r28,r29,r32 (ar.pfs), r33-r39
// Floating point registers: f6-f15
//
.section .text
GR_SAVE_B0 = r33
GR_SAVE_PFS = r34
GR_SAVE_GP = r35
GR_SAVE_SP = r36
GR_Parameter_X = r37
GR_Parameter_Y = r38
GR_Parameter_RESULT = r39
GR_Parameter_TAG = r40
FR_X = f10
FR_Y = f9
FR_RESULT = f8
.proc remainderf#
.align 32
.global remainderf#
.align 32
remainderf:
#ifdef _LIBC
.global __remainderf
.type __remainderf,@function
__remainderf:
#endif
// inputs in f8, f9
// result in f8
{ .mfi
alloc r32=ar.pfs,1,4,4,0
// f13=|a|
fmerge.s f13=f0,f8
nop.i 0
}
{.mfi
nop.m 0
// f14=|b|
fmerge.s f14=f0,f9
nop.i 0;;
}
{.mlx
nop.m 0
// r2=2^{24}-2
movl r3=0x4b7ffffe;;
}
// Y +-NAN, +-inf, +-0? p11
{ .mfi
nop.m 999
(p0) fclass.m.unc p11,p0 = f9, 0xe7
nop.i 999
}
// qnan snan inf norm unorm 0 -+
// 1 1 1 0 0 0 11
// e 3
// X +-NAN, +-inf, ? p9
{ .mfi
nop.m 999
(p0) fclass.m.unc p9,p0 = f8, 0xe3
nop.i 999;;
}
{.mfi
nop.m 0
mov f15=f0
nop.i 0
}
{ .mfi
// set p7=1
cmp.eq.unc p7,p0=r0,r0
// Step (1)
// y0 = 1 / b in f10
frcpa.s1 f10,p6=f13,f14
nop.i 0;;
}
{.bbb
(p9) br.cond.spnt L(FREM_X_NAN_INF)
(p11) br.cond.spnt L(FREM_Y_NAN_INF_ZERO)
nop.b 0
} {.mfi
nop.m 0
// set D flag if a (f8) is denormal
fnma.s0 f6=f8,f1,f8
nop.i 0;;
}
.align 32
L(remloop24):
{ .mfi
// f12=2^{24}-2
setf.s f12=r3
// Step (2)
// q0 = a * y0 in f15
(p6) fma.s1 f15=f13,f10,f0
nop.i 0
}
{ .mfi
nop.m 0
// Step (3)
// e0 = 1 - b * y0 in f7
(p6) fnma.s1 f7=f14,f10,f1
nop.i 0;;
}
{.mlx
nop.m 0
// r2=1.25*2^{-24}
movl r2=0x33a00000;;
}
{ .mfi
nop.m 0
// Step (4)
// q1 = q0 + e0 * q0 in f6
(p6) fma.s1 f6=f7,f15,f15
nop.i 0
}
{ .mfi
nop.m 0
// Step (5)
// e1 = e0 * e0 in f7
(p6) fma.s1 f7=f7,f7,f0
nop.i 0;;
}
{.mii
(p7) getf.exp r29=f15
(p7) mov r28=0xfffd
nop.i 0;;
}
{ .mfi
// f15=1.25*2^{-24}
setf.s f15=r2
// Step (6)
// q2 = q1 + e1 * q1 in f6
(p6) fma.s1 f6=f7,f6,f6
nop.i 0
}
{ .mfi
mov r2=0x3e7
// Step (7)
// e2 = e1 * e1 in f7
(p6) fma.s1 f7=f7,f7,f0
nop.i 0;;
}
{.mmi
// q<1/4 ? (i.e. expon< -2)
(p7) cmp.gt.unc p7,p0=r28,r29
nop.m 0
// r2=0x3e7000000
shl r2=r2,24;;
}
{.mfb
// r2=0x3e7000001
add r2=1,r2
// if |a/b|<1/4, set D flag before returning
(p7) fma.s.s0 f9=f9,f0,f8
nop.b 0;;
}
{.mfb
nop.m 0
// can be combined with bundle above if sign of 0 or
// FTZ enabled are not important
(p7) fmerge.s f8=f8,f9
// return if |a|<4*|b| (estimated quotient < 1/4)
(p7) br.ret.spnt b0;;
}
{.mfi
nop.m 0
// set f8 to current a value | sign
fmerge.s f8=f8,f13
// r2=2^{-24}+2^{-48} (double prec.)
shl r2=r2,28;;
}
{ .mfi
// r29= -32+bias
mov r29=0xffdf
// Step (8)
// q3 = q2 + e2 * q2 in f6
(p6) fma.d.s1 f6=f7,f6,f6
nop.i 0;;
}
{ .mfi
nop.m 0
// Step (9)
// q = q3 in f11
(p6) fma.s.s1 f11=f6,f1,f0
nop.i 0;;
}
{.mfi
// f7=2^{-24}
setf.d f7=r2
// last step ? (q3<2^{24}-2 --> q<2^{24})
fcmp.lt.unc.s1 p0,p12=f6,f12
nop.i 0
} {.mfi
// f12=2^{-32}
setf.exp f12=r29
nop.f 0
nop.i 0;;
}
{.mfi
nop.m 0
// r=a-b*q
fnma.s1 f6=f14,f11,f13
nop.i 0
}
{.mfi
nop.m 0
// q'=q-q*(1.25*2^{-24}) (q'=q-ulp)
fnma.s.s1 f15=f11,f15,f11
nop.i 0;;
}
{.mfi
nop.m 0
// r2=a-b*q'
fnma.s1 f13=f14,f15,f13
nop.i 0;;
}
{.mfi
nop.m 0
// r>0 iff q=RZ(a/b) and inexact
fcmp.gt.unc.s1 p8,p0=f6,f0
nop.i 0
}
{.mfi
nop.m 0
// r<0 iff q'=RZ(a/b) and inexact
fcmp.lt.unc.s1 p9,p10=f6,f0
nop.i 0;;
}
.pred.rel "mutex",p8,p9
{.mfi
nop.m 0
// (p8) Q=q+(last iteration ? sticky bits:0)
// i.e. Q=q+q*x (x=2^{-32} or 0)
(p8) fma.s1 f11=f11,f12,f11
nop.i 0
}
{.mfi
nop.m 0
// (p9) Q=q'+(last iteration ? sticky bits:0)
// i.e. Q=q'+q'*x (x=2^{-24} or 0: if expon. difference=23, want to round back to q)
(p9) fma.s1 f11=f15,f7,f15
nop.i 0;;
}
{.mfb
nop.m 0
// (p9) set r=r2 (new a, if not last iteration)
// (p10) new a =r
(p10) mov f13=f6
(p12) br.cond.sptk L(remloop24);;
}
// last iteration
{.mfi
nop.m 0
// set f9=|b|*sgn(a)
fmerge.s f9=f8,f9
nop.i 0
}
{.mfi
nop.m 0
// round to integer
fcvt.fx.s1 f11=f11
nop.i 0;;
}
{.mfi
nop.m 0
// save sign of a
fmerge.s f7=f8,f8
nop.i 0
}
{.mfi
nop.m 0
// normalize
fcvt.xf f11=f11
nop.i 0;;
}
{.mfi
nop.m 0
// This can be removed if sign of 0 is not important
// get remainder using sf1
fnma.s.s1 f12=f9,f11,f8
nop.i 0
}
{.mfi
nop.m 0
// get remainder
fnma.s.s0 f8=f9,f11,f8
nop.i 0;;
}
{.mfi
nop.m 0
// f12=0?
// This can be removed if sign of 0 is not important
fcmp.eq.unc.s1 p8,p0=f12,f0
nop.i 0;;
}
{.mfb
nop.m 0
// if f8=0, set sign correctly
// This can be removed if sign of 0 is not important
(p8) fmerge.s f8=f7,f8
// return
br.ret.sptk b0;;
}
L(FREM_X_NAN_INF):
// Y zero ?
{.mfi
nop.m 0
fma.s1 f10=f9,f1,f0
nop.i 0;;
}
{.mfi
nop.m 0
fcmp.eq.unc.s1 p11,p0=f10,f0
nop.i 0;;
}
{.mib
nop.m 0
nop.i 0
// if Y zero
(p11) br.cond.spnt L(FREM_Y_ZERO);;
}
// X infinity? Return QNAN indefinite
{ .mfi
nop.m 999
(p0) fclass.m.unc p8,p0 = f8, 0x23
nop.i 999
}
// X infinity? Return QNAN indefinite
{ .mfi
nop.m 999
(p0) fclass.m.unc p11,p0 = f8, 0x23
nop.i 999;;
}
// Y NaN ?
{.mfi
nop.m 999
(p8) fclass.m.unc p0,p8=f9,0xc3
nop.i 0;;
}
{.mfi
nop.m 999
// also set Denormal flag if necessary
(p8) fma.s0 f9=f9,f1,f0
nop.i 0
}
{ .mfi
nop.m 999
(p8) frcpa.s0 f8,p7 = f8,f8
nop.i 999 ;;
}
{.mfi
nop.m 999
(p11) mov f10=f8
nop.i 0
}
{ .mfi
nop.m 999
(p8) fma.s f8=f8,f1,f0
nop.i 0 ;;
}
{ .mfb
nop.m 999
frcpa.s0 f8,p7=f8,f9
(p11) br.cond.spnt L(EXP_ERROR_RETURN);;
}
{ .mib
nop.m 0
nop.i 0
br.ret.spnt b0 ;;
}
L(FREM_Y_NAN_INF_ZERO):
// Y INF
{ .mfi
nop.m 999
(p0) fclass.m.unc p7,p0 = f9, 0x23
nop.i 999 ;;
}
{ .mfb
nop.m 999
(p7) fma.s f8=f8,f1,f0
(p7) br.ret.spnt b0 ;;
}
// Y NAN?
{ .mfi
nop.m 999
(p0) fclass.m.unc p9,p0 = f9, 0xc3
nop.i 999 ;;
}
{ .mfb
nop.m 999
(p9) fma.s f8=f9,f1,f0
(p9) br.ret.spnt b0 ;;
}
L(FREM_Y_ZERO):
// Y zero? Must be zero at this point
// because it is the only choice left.
// Return QNAN indefinite
// X NAN?
{ .mfi
nop.m 999
(p0) fclass.m.unc p9,p10 = f8, 0xc3
nop.i 999 ;;
}
{ .mfi
nop.m 999
(p10) fclass.nm p9,p10 = f8, 0xff
nop.i 999 ;;
}
{.mfi
nop.m 999
(p9) frcpa f11,p7=f8,f0
nop.i 0;;
}
{ .mfi
nop.m 999
(p10) frcpa f11,p7 = f0,f0
nop.i 999;;
}
{ .mfi
nop.m 999
(p0) fmerge.s f10 = f8, f8
nop.i 999
}
{ .mfi
nop.m 999
(p0) fma.s f8=f11,f1,f0
nop.i 999
}
L(EXP_ERROR_RETURN):
{ .mib
(p0) mov GR_Parameter_TAG = 125
nop.i 999
(p0) br.sptk __libm_error_region;;
}
.endp remainderf
ASM_SIZE_DIRECTIVE(remainderf)
#ifdef _LIBC
ASM_SIZE_DIRECTIVE(__remainderf)
#endif
.proc __libm_error_region
__libm_error_region:
.prologue
{ .mfi
add GR_Parameter_Y=-32,sp // Parameter 2 value
nop.f 0
.save ar.pfs,GR_SAVE_PFS
mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
}
{ .mfi
.fframe 64
add sp=-64,sp // Create new stack
nop.f 0
mov GR_SAVE_GP=gp // Save gp
};;
{ .mmi
stfs [GR_Parameter_Y] = FR_Y,16 // Save Parameter 2 on stack
add GR_Parameter_X = 16,sp // Parameter 1 address
.save b0, GR_SAVE_B0
mov GR_SAVE_B0=b0 // Save b0
};;
.body
{ .mib
stfs [GR_Parameter_X] = FR_X // Store Parameter 1 on stack
add GR_Parameter_RESULT = 0,GR_Parameter_Y
nop.b 0 // Parameter 3 address
}
{ .mib
stfs [GR_Parameter_Y] = FR_RESULT // Store Parameter 3 on stack
add GR_Parameter_Y = -16,GR_Parameter_Y
br.call.sptk b0=__libm_error_support#;; // Call error handling function
}
{ .mmi
nop.m 0
nop.m 0
add GR_Parameter_RESULT = 48,sp
};;
{ .mmi
ldfs f8 = [GR_Parameter_RESULT] // Get return result off stack
.restore sp
add sp = 64,sp // Restore stack pointer
mov b0 = GR_SAVE_B0 // Restore return address
};;
{ .mib
mov gp = GR_SAVE_GP // Restore gp
mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
br.ret.sptk b0 // Return
};;
.endp __libm_error_region
ASM_SIZE_DIRECTIVE(__libm_error_region)
.type __libm_error_support#,@function
.global __libm_error_support#

View File

@ -0,0 +1,619 @@
.file "remainderl.asm"
// Copyright (c) 2000, 2001, Intel Corporation
// All rights reserved.
//
// Contributed 2/2/2000 by John Harrison, Cristina Iordache, Ted Kubaska,
// Bob Norin, Shane Story, and Ping Tak Peter Tang of the Computational
// Software Lab, Intel Corporation.
//
// WARRANTY DISCLAIMER
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
// http://developer.intel.com/opensource.
//
// History
//====================================================================
// 2/02/00 Initial version
// 3/02/00 New algorithm
// 4/04/00 Unwind support added
// 7/21/00 Fixed quotient=2^{24*m+23}*1.q1...q23 1 bug
// 8/15/00 Bundle added after call to __libm_error_support to properly
// set [the previously overwritten] GR_Parameter_RESULT.
//11/29/00 Set FR_Y to f9
//
// API
//====================================================================
// long double remainderl(long double,long double);
//
// Overview of operation
//====================================================================
// remainder(a,b)=a-i*b,
// where i is an integer such that, if b!=0 and a is finite,
// |a/b-i|<=1/2. If |a/b-i|=1/2, i is even.
//
// Algorithm
//====================================================================
// a). eliminate special cases
// b). if |a/b|<0.25 (first quotient estimate), return a
// c). use single precision divide algorithm to get quotient q
// rounded to 24 bits of precision
// d). calculate partial remainders (using both q and q-ulp);
// select one and RZ(a/b) based on the sign of |a|-|b|*q
// e). if the exponent difference (exponent(a)-exponent(b))
// is less than 24 (quotient estimate<2^{24}-2), use RZ(a/b)
// and sticky bits to round to integer; exit loop and
// calculate final remainder
// f). if exponent(a)-exponent(b)>=24, select new value of a as
// the partial remainder calculated using RZ(a/b);
// repeat from c).
//
// Special cases
//====================================================================
// a=+/- Inf, or b=+/-0: return NaN, call libm_error_support
// a=NaN or b=NaN: return NaN
#include "libm_support.h"
//
// Registers used
//====================================================================
// Predicate registers: p6-p14
// General registers: r2,r3,r28,r29,r32 (ar.pfs), r33-r39
// Floating point registers: f6-f15,f32
//
.section .text
GR_SAVE_B0 = r33
GR_SAVE_PFS = r34
GR_SAVE_GP = r35
GR_SAVE_SP = r36
GR_Parameter_X = r37
GR_Parameter_Y = r38
GR_Parameter_RESULT = r39
GR_Parameter_TAG = r40
FR_X = f10
FR_Y = f9
FR_RESULT = f8
.proc remainderl#
.align 32
.global remainderl#
.align 32
remainderl:
#ifdef _LIBC
.global __remainderl
.type __remainderl,@function
__remainderl:
#endif
// inputs in f8, f9
// result in f8
{ .mfi
alloc r32=ar.pfs,1,4,4,0
// f13=|a|
fmerge.s f13=f0,f8
nop.i 0
}
{.mfi
getf.sig r29=f9
// f14=|b|
fmerge.s f14=f0,f9
nop.i 0;;
}
{.mlx
mov r28=0x2ffdd
// r2=2^{23}
movl r3=0x4b000000;;
}
{.mmi
setf.exp f32=r28
nop.m 0
// y pseudo-zero ?
cmp.eq p11,p10=r29,r0;;
}
// Y +-NAN, +-inf, +-0? p11
{ .mfi
nop.m 999
(p10) fclass.m p11,p10 = f9, 0xe7
nop.i 999
}
// qnan snan inf norm unorm 0 -+
// 1 1 1 0 0 0 11
// e 3
// X +-NAN, +-inf, ? p9
{ .mfi
nop.m 999
(p0) fclass.m.unc p9,p8 = f8, 0xe3
nop.i 999;;
}
{.mfi
nop.m 0
mov f12=f0
nop.i 0
}
{ .mfi
// set p7=1
cmp.eq.unc p7,p0=r0,r0
// Step (1)
// y0 = 1 / b in f10
frcpa.s1 f10,p6=f13,f14
nop.i 0;;
}
// Y +-NAN, +-inf, +-0? p11
{ .mfi
nop.m 999
// pseudo-NaN ?
(p10) fclass.nm p11,p0 = f9, 0xff
nop.i 999
}
// qnan snan inf norm unorm 0 -+
// 1 1 1 0 0 0 11
// e 3
// X +-NAN, +-inf, ? p9
{ .mfi
nop.m 999
(p8) fclass.nm p9,p0 = f8, 0xff
nop.i 999;;
}
{.bbb
(p9) br.cond.spnt L(FREM_X_NAN_INF)
(p11) br.cond.spnt L(FREM_Y_NAN_INF_ZERO)
nop.b 0
} {.mfi
nop.m 0
// set D flag if a (f8) is denormal
fnma.s0 f6=f8,f1,f8
nop.i 0;;
}
L(remloop24):
{ .mfi
nop.m 0
// Step (2)
// q0 = a * y0 in f15
(p6) fma.s1 f12=f13,f10,f0
nop.i 0
} { .mfi
nop.m 0
// Step (3)
// e0 = 1 - b * y0 in f7
(p6) fnma.s1 f7=f14,f10,f1
nop.i 0;;
} {.mlx
nop.m 0
// r2=1.25*2^{-24}
movl r2=0x33a00000;;
}
{.mfi
nop.m 0
// q1=q0*(1+e0)
fma.s1 f15=f12,f7,f12
nop.i 0
}
{ .mfi
nop.m 0
// Step (4)
// e1 = e0 * e0 + E in f7
(p6) fma.s1 f7=f7,f7,f32
nop.i 0;;
}
{.mii
(p7) getf.exp r29=f12
(p7) mov r28=0xfffd
nop.i 0;;
}
{ .mfi
// f12=2^{23}
setf.s f12=r3
// Step (5)
// q2 = q1 + e1 * q1 in f11
(p6) fma.s.s1 f11=f7,f15,f15
nop.i 0
} { .mfi
nop.m 0
// Step (6)
// q2 = q1 + e1 * q1 in f6
(p6) fma.s1 f6=f7,f15,f15
nop.i 0;;
}
{.mmi
// f15=1.25*2^{-24}
setf.s f15=r2
// q<1/4 ? (i.e. expon< -2)
(p7) cmp.gt p7,p0=r28,r29
nop.i 0;;
}
{.mfb
// r29= -32+bias
mov r29=0xffdf
// if |a/b|<1/4, set D flag before returning
(p7) fma.s0 f9=f9,f0,f8
nop.b 0;;
}
{.mfb
nop.m 0
// can be combined with bundle above if sign of 0 or
// FTZ enabled are not important
(p7) fmerge.s f8=f8,f9
// return if |a|<4*|b| (estimated quotient < 1/4)
(p7) br.ret.spnt b0;;
}
{.mfi
// f7=2^{-32}
setf.exp f7=r29
// set f8 to current a value | sign
fmerge.s f8=f8,f13
nop.i 0;;
}
{.mfi
getf.exp r28=f6
// last step ? (q<2^{23})
fcmp.lt.unc.s1 p0,p12=f6,f12
nop.i 0;;
}
{.mfi
nop.m 0
// r=a-b*q
fnma.s1 f6=f14,f11,f13
nop.i 0
} {.mfi
// r2=23+bias
mov r2=0xffff+23
// q'=q-q*(1.25*2^{-24}) (q'=q-ulp)
fnma.s.s1 f15=f11,f15,f11
nop.i 0;;
}
{.mmi
nop.m 0
cmp.eq p11,p14=r2,r28
nop.i 0;;
}
.pred.rel "mutex",p11,p14
{.mfi
nop.m 0
// if exp_q=2^23, then r=a-b*2^{23}
(p11) fnma.s1 f13=f12,f14,f13
nop.i 0
}
{.mfi
nop.m 0
// r2=a-b*q'
(p14) fnma.s1 f13=f14,f15,f13
nop.i 0;;
}
{.mfi
nop.m 0
// r>0 iff q=RZ(a/b) and inexact
fcmp.gt.unc.s1 p8,p0=f6,f0
nop.i 0
} {.mfi
nop.m 0
// r<0 iff q'=RZ(a/b) and inexact
(p14) fcmp.lt.unc.s1 p9,p10=f6,f0
nop.i 0;;
}
.pred.rel "mutex",p8,p9
{.mfi
nop.m 0
// (p8) Q=q+(last iteration ? sticky bits:0)
// i.e. Q=q+q*x (x=2^{-32} or 0)
(p8) fma.s1 f11=f11,f7,f11
nop.i 0
} {.mfi
nop.m 0
// (p9) Q=q'+(last iteration ? sticky bits:0)
// i.e. Q=q'+q'*x (x=2^{-32} or 0)
(p9) fma.s1 f11=f15,f7,f15
nop.i 0;;
}
{.mfb
nop.m 0
// (p9) set r=r2 (new a, if not last iteration)
// (p10) new a =r
(p10) mov f13=f6
(p12) br.cond.sptk L(remloop24);;
}
// last iteration
{.mfi
nop.m 0
// set f9=|b|*sgn(a)
fmerge.s f9=f8,f9
nop.i 0
}
{.mfi
nop.m 0
// round to integer
fcvt.fx.s1 f11=f11
nop.i 0;;
}
{.mfi
nop.m 0
// save sign of a
fmerge.s f7=f8,f8
nop.i 0
} {.mfi
nop.m 0
// normalize
fcvt.xf f11=f11
nop.i 0;;
}
{.mfi
nop.m 0
// This can be removed if sign of 0 is not important
// get remainder using sf1
fnma.s1 f12=f9,f11,f8
nop.i 0
}
{.mfi
nop.m 0
// get remainder
fnma.s0 f8=f9,f11,f8
nop.i 0;;
}
{.mfi
nop.m 0
// f12=0?
// This can be removed if sign of 0 is not important
fcmp.eq.unc.s1 p8,p0=f12,f0
nop.i 0;;
}
{.mfb
nop.m 0
// if f8=0, set sign correctly
// This can be removed if sign of 0 is not important
(p8) fmerge.s f8=f7,f8
// return
br.ret.sptk b0;;
}
L(FREM_X_NAN_INF):
// Y zero ?
{.mfi
nop.m 0
fma.s1 f10=f9,f1,f0
nop.i 0;;
}
{.mfi
nop.m 0
fcmp.eq.unc.s1 p11,p0=f10,f0
nop.i 0;;
}
{.mib
nop.m 0
nop.i 0
// if Y zero
(p11) br.cond.spnt L(FREM_Y_ZERO);;
}
// X infinity? Return QNAN indefinite
{ .mfi
nop.m 999
(p0) fclass.m.unc p8,p0 = f8, 0x23
nop.i 999
}
// X infinity? Return QNAN indefinite
{ .mfi
nop.m 999
(p0) fclass.m.unc p11,p0 = f8, 0x23
nop.i 999;;
}
// Y NaN ?
{.mfi
nop.m 999
(p8) fclass.m.unc p0,p8=f9,0xc3
nop.i 0;;
}
{.mfi
nop.m 999
// also set Denormal flag if necessary
(p8) fnma.s0 f9=f9,f1,f9
nop.i 0
}
{ .mfi
nop.m 999
(p8) frcpa.s0 f8,p7 = f8,f8
nop.i 999 ;;
}
{.mfi
nop.m 999
(p11) mov f10=f8
nop.i 0
}
{ .mfi
nop.m 999
(p8) fma f8=f8,f1,f0
nop.i 0 ;;
}
{ .mfb
nop.m 999
frcpa.s0 f8,p7=f8,f9
(p11) br.cond.spnt L(EXP_ERROR_RETURN);;
}
{ .mib
nop.m 0
nop.i 0
br.ret.spnt b0 ;;
}
L(FREM_Y_NAN_INF_ZERO):
// Y INF
{ .mfi
nop.m 999
(p0) fclass.m.unc p7,p0 = f9, 0x23
nop.i 999 ;;
}
{ .mfb
nop.m 999
(p7) fma f8=f8,f1,f0
(p7) br.ret.spnt b0 ;;
}
// Y NAN?
{ .mfi
nop.m 999
(p0) fclass.m.unc p9,p10 = f9, 0xc3
nop.i 999 ;;
}
{ .mfi
nop.m 999
(p10) fclass.nm p9,p0 = f9, 0xff
nop.i 999 ;;
}
{ .mfb
nop.m 999
(p9) fma f8=f9,f1,f0
(p9) br.ret.spnt b0 ;;
}
L(FREM_Y_ZERO):
// Y zero? Must be zero at this point
// because it is the only choice left.
// Return QNAN indefinite
// X NAN?
{ .mfi
nop.m 999
(p0) fclass.m.unc p9,p10 = f8, 0xc3
nop.i 999 ;;
}
{ .mfi
nop.m 999
(p10) fclass.nm p9,p10 = f8, 0xff
nop.i 999 ;;
}
{.mfi
nop.m 999
(p9) frcpa f11,p7=f8,f0
nop.i 0;;
}
{ .mfi
nop.m 999
(p10) frcpa f11,p7 = f0,f0
nop.i 999;;
}
{ .mfi
nop.m 999
(p0) fmerge.s f10 = f8, f8
nop.i 999
}
{ .mfi
nop.m 999
(p0) fma f8=f11,f1,f0
nop.i 999;;
}
L(EXP_ERROR_RETURN):
{ .mib
(p0) mov GR_Parameter_TAG = 123
nop.i 999
(p0) br.sptk __libm_error_region;;
}
.endp remainderl
ASM_SIZE_DIRECTIVE(remainderl)
#ifdef _LIBC
ASM_SIZE_DIRECTIVE(__remainderl)
#endif
.proc __libm_error_region
__libm_error_region:
.prologue
{ .mfi
add GR_Parameter_Y=-32,sp // Parameter 2 value
nop.f 0
.save ar.pfs,GR_SAVE_PFS
mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
}
{ .mfi
.fframe 64
add sp=-64,sp // Create new stack
nop.f 0
mov GR_SAVE_GP=gp // Save gp
};;
{ .mmi
stfe [GR_Parameter_Y] = FR_Y,16 // Save Parameter 2 on stack
add GR_Parameter_X = 16,sp // Parameter 1 address
.save b0, GR_SAVE_B0
mov GR_SAVE_B0=b0 // Save b0
};;
.body
{ .mib
stfe [GR_Parameter_X] = FR_X // Store Parameter 1 on stack
add GR_Parameter_RESULT = 0,GR_Parameter_Y
nop.b 0 // Parameter 3 address
}
{ .mib
stfe [GR_Parameter_Y] = FR_RESULT // Store Parameter 3 on stack
add GR_Parameter_Y = -16,GR_Parameter_Y
br.call.sptk b0=__libm_error_support# // Call error handling function
};;
{ .mmi
nop.m 0
nop.m 0
add GR_Parameter_RESULT = 48,sp
};;
{ .mmi
ldfe f8 = [GR_Parameter_RESULT] // Get return result off stack
.restore sp
add sp = 64,sp // Restore stack pointer
mov b0 = GR_SAVE_B0 // Restore return address
};;
{ .mib
mov gp = GR_SAVE_GP // Restore gp
mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
br.ret.sptk b0 // Return
};;
.endp __libm_error_region
ASM_SIZE_DIRECTIVE(__libm_error_region)
.type __libm_error_support#,@function
.global __libm_error_support#

551
sysdeps/ia64/fpu/e_scalb.S Normal file
View File

@ -0,0 +1,551 @@
.file "scalb.s"
// Copyright (c) 2000, 2001, Intel Corporation
// All rights reserved.
//
// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
//
// WARRANTY DISCLAIMER
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
// http://developer.intel.com/opensource.
//
// History
//==============================================================
// 2/02/00 Initial version
// 1/26/01 Scalb completely reworked and now standalone version
//
// API
//==============================================================
// double = scalb (double x, double n)
// input floating point f8 and floating point f9
// output floating point f8
//
// Returns x* 2**n using an fma and detects overflow
// and underflow.
//
//
#include "libm_support.h"
FR_Floating_X = f8
FR_Result = f8
FR_Floating_N = f9
FR_Result2 = f9
FR_Norm_N = f10
FR_Result3 = f11
FR_Norm_X = f12
FR_N_float_int = f13
FR_Two_N = f14
FR_Two_to_Big = f15
FR_Big = f6
FR_NBig = f7
GR_N_Biased = r15
GR_Big = r16
GR_NBig = r17
GR_Scratch = r18
GR_Scratch1 = r19
GR_Bias = r20
GR_N_as_int = r21
GR_SAVE_B0 = r32
GR_SAVE_GP = r33
GR_SAVE_PFS = r34
GR_Parameter_X = r35
GR_Parameter_Y = r36
GR_Parameter_RESULT = r37
GR_Tag = r38
.align 32
.global scalb
.section .text
.proc scalb
.align 32
scalb:
#ifdef _LIBC
.global __ieee754_scalb
.type __ieee754_scalb,@function
__ieee754_scalb:
#endif
//
// Is x NAN, INF, ZERO, +-?
//
{ .mfi
alloc r32=ar.pfs,0,3,4,0
fclass.m.unc p7,p0 = FR_Floating_X, 0xe7 //@snan | @qnan | @inf | @zero
addl GR_Scratch = 0x019C3F,r0
}
//
// Is y a NAN, INF, ZERO, +-?
//
{ .mfi
nop.m 999
fclass.m.unc p6,p0 = FR_Floating_N, 0xe7 //@snan | @qnan | @inf | @zero
addl GR_Scratch1 = 0x063BF,r0
}
;;
//
// Convert N to a fp integer
// Normalize x
//
{ .mfi
nop.m 0
fnorm.s1 FR_Norm_N = FR_Floating_N
nop.i 999
}
{ .mfi
nop.m 999
fnorm.s1 FR_Norm_X = FR_Floating_X
nop.i 999
};;
//
// Create 2*big
// Create 2**-big
// Normalize x
// Branch on special values.
//
{ .mib
setf.exp FR_Big = GR_Scratch
nop.i 0
(p6) br.cond.spnt L(SCALB_NAN_INF_ZERO)
}
{ .mib
setf.exp FR_NBig = GR_Scratch1
nop.i 0
(p7) br.cond.spnt L(SCALB_NAN_INF_ZERO)
};;
//
// Convert N to a fp integer
// Create -35000
//
{ .mfi
addl GR_Scratch = 1,r0
fcvt.fx.trunc.s1 FR_N_float_int = FR_Norm_N
addl GR_NBig = -35000,r0
}
;;
//
// Put N if a GP register
// Convert N_float_int to floating point value
// Create 35000
// Build the exponent Bias
//
{ .mii
getf.sig GR_N_as_int = FR_N_float_int
shl GR_Scratch = GR_Scratch,63
addl GR_Big = 35000,r0
}
{ .mfi
addl GR_Bias = 0x0FFFF,r0
fcvt.xf FR_N_float_int = FR_N_float_int
nop.i 0
};;
//
// Catch those fp values that are beyond 2**64-1
// Is N > 35000
// Is N < -35000
//
{ .mfi
cmp.ne.unc p9,p10 = GR_N_as_int,GR_Scratch
nop.f 0
nop.i 0
}
{ .mmi
cmp.ge.unc p6, p0 = GR_N_as_int, GR_Big
cmp.le.unc p8, p0 = GR_N_as_int, GR_NBig
nop.i 0
};;
//
// Is N really an int, only for those non-int indefinites?
// Create exp bias.
//
{ .mfi
add GR_N_Biased = GR_Bias,GR_N_as_int
(p9) fcmp.neq.unc.s1 p7,p0 = FR_Norm_N, FR_N_float_int
nop.i 0
};;
//
// Branch and return if N is not an int.
// Main path, create 2**N
//
{ .mfi
setf.exp FR_Two_N = GR_N_Biased
nop.i 999
}
{ .mfb
nop.m 0
(p7) frcpa f8,p11 = f0,f0
(p7) br.ret.spnt b0
};;
//
// Set denormal on denormal input x and denormal input N
//
{ .mfi
nop.m 999
(p10)fcmp.ge.s1 p6,p8 = FR_Norm_N,f0
nop.i 0
};;
{ .mfi
nop.m 999
fcmp.ge.s0 p0,p11 = FR_Floating_X,f0
nop.i 999
}
{ .mfi
nop.m 999
fcmp.ge.s0 p12,p13 = FR_Floating_N,f0
nop.i 0
};;
//
// Adjust 2**N if N was very small or very large
//
{ .mfi
nop.m 0
(p6) fma.s1 FR_Two_N = FR_Big,f1,f0
nop.i 0
}
{ .mlx
nop.m 999
(p0) movl GR_Scratch = 0x00000000000303FF
};;
{ .mfi
nop.m 0
(p8) fma.s1 FR_Two_N = FR_NBig,f1,f0
nop.i 0
}
{ .mlx
nop.m 999
(p0) movl GR_Scratch1= 0x00000000000103FF
};;
// Set up necessary status fields
//
// S0 user supplied status
// S2 user supplied status + WRE + TD (Overflows)
// S3 user supplied status + FZ + TD (Underflows)
//
{ .mfi
nop.m 999
(p0) fsetc.s3 0x7F,0x41
nop.i 999
}
{ .mfi
nop.m 999
(p0) fsetc.s2 0x7F,0x42
nop.i 999
};;
//
// Do final operation
//
{ .mfi
setf.exp FR_NBig = GR_Scratch
fma.d.s0 FR_Result = FR_Two_N,FR_Norm_X,f0
nop.i 999
}
{ .mfi
nop.m 999
fma.d.s3 FR_Result3 = FR_Two_N,FR_Norm_X,f0
nop.i 999
};;
{ .mfi
setf.exp FR_Big = GR_Scratch1
fma.d.s2 FR_Result2 = FR_Two_N,FR_Norm_X,f0
nop.i 999
};;
// Check for overflow or underflow.
//
// S0 user supplied status
// S2 user supplied status + WRE + TD (Overflow)
// S3 user supplied status + FZ + TD (Underflow)
//
//
// Restore s3
// Restore s2
//
{ .mfi
nop.m 0
fsetc.s3 0x7F,0x40
nop.i 999
}
{ .mfi
nop.m 0
fsetc.s2 0x7F,0x40
nop.i 999
};;
//
// Is the result zero?
//
{ .mfi
nop.m 999
fclass.m.unc p6, p0 = FR_Result3, 0x007
nop.i 999
}
{ .mfi
addl GR_Tag = 53, r0
fcmp.ge.unc.s1 p7, p8 = FR_Result2 , FR_Big
nop.i 0
};;
//
// Detect masked underflow - Tiny + Inexact Only
//
{ .mfi
nop.m 999
(p6) fcmp.neq.unc.s1 p6, p0 = FR_Result , FR_Result2
nop.i 999
};;
//
// Is result bigger the allowed range?
// Branch out for underflow
//
{ .mfb
(p6) addl GR_Tag = 54, r0
(p8) fcmp.le.unc.s1 p9, p10 = FR_Result2 , FR_NBig
(p6) br.cond.spnt L(SCALB_UNDERFLOW)
};;
//
// Branch out for overflow
//
{ .mbb
nop.m 0
(p7) br.cond.spnt L(SCALB_OVERFLOW)
(p9) br.cond.spnt L(SCALB_OVERFLOW)
};;
//
// Return from main path.
//
{ .mfb
nop.m 999
nop.f 0
br.ret.sptk b0;;
}
L(SCALB_NAN_INF_ZERO):
//
// Convert N to a fp integer
//
{ .mfi
addl GR_Scratch = 1,r0
fcvt.fx.trunc.s1 FR_N_float_int = FR_Norm_N
nop.i 999
}
{ .mfi
nop.m 0
fclass.m.unc p6,p0 = FR_Floating_N, 0xc3 //@snan | @qnan
nop.i 0
};;
{ .mfi
nop.m 0
fclass.m.unc p7,p0 = FR_Floating_X, 0xc3 //@snan | @qnan
shl GR_Scratch = GR_Scratch,63
};;
{ .mfi
nop.m 0
fclass.m.unc p8,p0 = FR_Floating_N, 0x21 // @inf
nop.i 0
}
{ .mfi
nop.m 0
fclass.m.unc p9,p0 = FR_Floating_N, 0x22 // @-inf
nop.i 0
};;
//
// Either X or N is a Nan, return result and possible raise invalid.
//
{ .mfb
nop.m 0
(p6) fma.d.s0 FR_Result = FR_Floating_N,FR_Floating_X,f0
(p6) br.ret.spnt b0
};;
{ .mfb
getf.sig GR_N_as_int = FR_N_float_int
(p7) fma.d.s0 FR_Result = FR_Floating_N,FR_Floating_X,f0
(p7) br.ret.spnt b0
};;
//
// If N + Inf do something special
// For N = -Inf, create Int
//
{ .mfb
nop.m 0
(p8) fma.d.s0 FR_Result = FR_Floating_X, FR_Floating_N,f0
(p8) br.ret.spnt b0
}
{ .mfi
nop.m 0
(p9) fnma.d.s0 FR_Floating_N = FR_Floating_N, f1, f0
nop.i 0
};;
//
// If N==-Inf,return x/(-N)
//
{ .mfb
nop.m 0
(p9) frcpa.s0 FR_Result,p6 = FR_Floating_X,FR_Floating_N
(p9) br.ret.spnt b0
};;
//
// Convert N_float_int to floating point value
//
{ .mfi
cmp.ne.unc p9,p0 = GR_N_as_int,GR_Scratch
fcvt.xf FR_N_float_int = FR_N_float_int
nop.i 0
};;
//
// Is N an integer.
//
{ .mfi
nop.m 0
(p9) fcmp.neq.unc.s1 p7,p0 = FR_Norm_N, FR_N_float_int
nop.i 0
};;
//
// If N not an int, return NaN and raise invalid.
//
{ .mfb
nop.m 0
(p7) frcpa.s0 FR_Result,p6 = f0,f0
(p7) br.ret.spnt b0
};;
//
// Always return x in other path.
//
{ .mfb
nop.m 0
fma.d.s0 FR_Result = FR_Floating_X,f1,f0
br.ret.sptk b0
};;
.endp scalb
ASM_SIZE_DIRECTIVE(scalb)
#ifdef _LIBC
ASM_SIZE_DIRECTIVE(__ieee754_scalb)
#endif
.proc __libm_error_region
__libm_error_region:
L(SCALB_OVERFLOW):
L(SCALB_UNDERFLOW):
//
// Get stack address of N
//
.prologue
{ .mfi
add GR_Parameter_Y=-32,sp
nop.f 0
.save ar.pfs,GR_SAVE_PFS
mov GR_SAVE_PFS=ar.pfs
}
//
// Adjust sp
//
{ .mfi
.fframe 64
add sp=-64,sp
nop.f 0
mov GR_SAVE_GP=gp
};;
//
// Store N on stack in correct position
// Locate the address of x on stack
//
{ .mmi
stfd [GR_Parameter_Y] = FR_Norm_N,16
add GR_Parameter_X = 16,sp
.save b0, GR_SAVE_B0
mov GR_SAVE_B0=b0
};;
//
// Store x on the stack.
// Get address for result on stack.
//
.body
{ .mib
stfd [GR_Parameter_X] = FR_Norm_X
add GR_Parameter_RESULT = 0,GR_Parameter_Y
nop.b 0
}
{ .mib
stfd [GR_Parameter_Y] = FR_Result
add GR_Parameter_Y = -16,GR_Parameter_Y
br.call.sptk b0=__libm_error_support#
};;
//
// Get location of result on stack
//
{ .mmi
nop.m 0
nop.m 0
add GR_Parameter_RESULT = 48,sp
};;
//
// Get the new result
//
{ .mmi
ldfd FR_Result = [GR_Parameter_RESULT]
.restore sp
add sp = 64,sp
mov b0 = GR_SAVE_B0
};;
//
// Restore gp, ar.pfs and return
//
{ .mib
mov gp = GR_SAVE_GP
mov ar.pfs = GR_SAVE_PFS
br.ret.sptk b0
};;
.endp __libm_error_region
ASM_SIZE_DIRECTIVE(__libm_error_region)
.type __libm_error_support#,@function
.global __libm_error_support#

551
sysdeps/ia64/fpu/e_scalbf.S Normal file
View File

@ -0,0 +1,551 @@
.file "scalbf.s"
// Copyright (c) 2000, 2001, Intel Corporation
// All rights reserved.
//
// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
//
// WARRANTY DISCLAIMER
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
// http://developer.intel.com/opensource.
//
// History
//==============================================================
// 2/02/00 Initial version
// 1/26/01 Scalb completely reworked and now standalone version
//
// API
//==============================================================
// float = scalbf (float x, float n)
// input floating point f8 and floating point f9
// output floating point f8
//
// Returns x* 2**n using an fma and detects overflow
// and underflow.
//
//
#include "libm_support.h"
FR_Floating_X = f8
FR_Result = f8
FR_Floating_N = f9
FR_Result2 = f9
FR_Norm_N = f10
FR_Result3 = f11
FR_Norm_X = f12
FR_N_float_int = f13
FR_Two_N = f14
FR_Two_to_Big = f15
FR_Big = f6
FR_NBig = f7
GR_N_Biased = r15
GR_Big = r16
GR_NBig = r17
GR_Scratch = r18
GR_Scratch1 = r19
GR_Bias = r20
GR_N_as_int = r21
GR_SAVE_B0 = r32
GR_SAVE_GP = r33
GR_SAVE_PFS = r34
GR_Parameter_X = r35
GR_Parameter_Y = r36
GR_Parameter_RESULT = r37
GR_Tag = r38
.align 32
.global scalbf
.section .text
.proc scalbf
.align 32
scalbf:
#ifdef _LIBC
.global __ieee754_scalbf
.type __ieee754_scalbf,@function
__ieee754_scalbf:
#endif
//
// Is x NAN, INF, ZERO, +-?
//
{ .mfi
alloc r32=ar.pfs,0,3,4,0
fclass.m.unc p7,p0 = FR_Floating_X, 0xe7 //@snan | @qnan | @inf | @zero
addl GR_Scratch = 0x019C3F,r0
}
//
// Is y a NAN, INF, ZERO, +-?
//
{ .mfi
nop.m 999
fclass.m.unc p6,p0 = FR_Floating_N, 0xe7 //@snan | @qnan | @inf | @zero
addl GR_Scratch1 = 0x063BF,r0
}
;;
//
// Convert N to a fp integer
// Normalize x
//
{ .mfi
nop.m 0
fnorm.s1 FR_Norm_N = FR_Floating_N
nop.i 999
}
{ .mfi
nop.m 999
fnorm.s1 FR_Norm_X = FR_Floating_X
nop.i 999
};;
//
// Create 2*big
// Create 2**-big
// Normalize x
// Branch on special values.
//
{ .mib
setf.exp FR_Big = GR_Scratch
nop.i 0
(p6) br.cond.spnt L(SCALBF_NAN_INF_ZERO)
}
{ .mib
setf.exp FR_NBig = GR_Scratch1
nop.i 0
(p7) br.cond.spnt L(SCALBF_NAN_INF_ZERO)
};;
//
// Convert N to a fp integer
// Create -35000
//
{ .mfi
addl GR_Scratch = 1,r0
fcvt.fx.trunc.s1 FR_N_float_int = FR_Norm_N
addl GR_NBig = -35000,r0
}
;;
//
// Put N if a GP register
// Convert N_float_int to floating point value
// Create 35000
// Build the exponent Bias
//
{ .mii
getf.sig GR_N_as_int = FR_N_float_int
shl GR_Scratch = GR_Scratch,63
addl GR_Big = 35000,r0
}
{ .mfi
addl GR_Bias = 0x0FFFF,r0
fcvt.xf FR_N_float_int = FR_N_float_int
nop.i 0
};;
//
// Catch those fp values that are beyond 2**64-1
// Is N > 35000
// Is N < -35000
//
{ .mfi
cmp.ne.unc p9,p10 = GR_N_as_int,GR_Scratch
nop.f 0
nop.i 0
}
{ .mmi
cmp.ge.unc p6, p0 = GR_N_as_int, GR_Big
cmp.le.unc p8, p0 = GR_N_as_int, GR_NBig
nop.i 0
};;
//
// Is N really an int, only for those non-int indefinites?
// Create exp bias.
//
{ .mfi
add GR_N_Biased = GR_Bias,GR_N_as_int
(p9) fcmp.neq.unc.s1 p7,p0 = FR_Norm_N, FR_N_float_int
nop.i 0
};;
//
// Branch and return if N is not an int.
// Main path, create 2**N
//
{ .mfi
setf.exp FR_Two_N = GR_N_Biased
nop.i 999
}
{ .mfb
nop.m 0
(p7) frcpa f8,p11 = f0,f0
(p7) br.ret.spnt b0
};;
//
// Set denormal on denormal input x and denormal input N
//
{ .mfi
nop.m 999
(p10)fcmp.ge.s1 p6,p8 = FR_Norm_N,f0
nop.i 0
};;
{ .mfi
nop.m 999
fcmp.ge.s0 p0,p11 = FR_Floating_X,f0
nop.i 999
}
{ .mfi
nop.m 999
fcmp.ge.s0 p12,p13 = FR_Floating_N,f0
nop.i 0
};;
//
// Adjust 2**N if N was very small or very large
//
{ .mfi
nop.m 0
(p6) fma.s1 FR_Two_N = FR_Big,f1,f0
nop.i 0
}
{ .mlx
nop.m 999
(p0) movl GR_Scratch = 0x000000000003007F
};;
{ .mfi
nop.m 0
(p8) fma.s1 FR_Two_N = FR_NBig,f1,f0
nop.i 0
}
{ .mlx
nop.m 999
(p0) movl GR_Scratch1= 0x000000000001007F
};;
// Set up necessary status fields
//
// S0 user supplied status
// S2 user supplied status + WRE + TD (Overflows)
// S3 user supplied status + FZ + TD (Underflows)
//
{ .mfi
nop.m 999
(p0) fsetc.s3 0x7F,0x41
nop.i 999
}
{ .mfi
nop.m 999
(p0) fsetc.s2 0x7F,0x42
nop.i 999
};;
//
// Do final operation
//
{ .mfi
setf.exp FR_NBig = GR_Scratch
fma.s.s0 FR_Result = FR_Two_N,FR_Norm_X,f0
nop.i 999
}
{ .mfi
nop.m 999
fma.s.s3 FR_Result3 = FR_Two_N,FR_Norm_X,f0
nop.i 999
};;
{ .mfi
setf.exp FR_Big = GR_Scratch1
fma.s.s2 FR_Result2 = FR_Two_N,FR_Norm_X,f0
nop.i 999
};;
// Check for overflow or underflow.
//
// S0 user supplied status
// S2 user supplied status + WRE + TD (Overflow)
// S3 user supplied status + FZ + TD (Underflow)
//
//
// Restore s3
// Restore s2
//
{ .mfi
nop.m 0
fsetc.s3 0x7F,0x40
nop.i 999
}
{ .mfi
nop.m 0
fsetc.s2 0x7F,0x40
nop.i 999
};;
//
// Is the result zero?
//
{ .mfi
nop.m 999
fclass.m.unc p6, p0 = FR_Result3, 0x007
nop.i 999
}
{ .mfi
addl GR_Tag = 55, r0
fcmp.ge.unc.s1 p7, p8 = FR_Result2 , FR_Big
nop.i 0
};;
//
// Detect masked underflow - Tiny + Inexact Only
//
{ .mfi
nop.m 999
(p6) fcmp.neq.unc.s1 p6, p0 = FR_Result , FR_Result2
nop.i 999
};;
//
// Is result bigger the allowed range?
// Branch out for underflow
//
{ .mfb
(p6) addl GR_Tag = 56, r0
(p8) fcmp.le.unc.s1 p9, p10 = FR_Result2 , FR_NBig
(p6) br.cond.spnt L(SCALBF_UNDERFLOW)
};;
//
// Branch out for overflow
//
{ .mbb
nop.m 0
(p7) br.cond.spnt L(SCALBF_OVERFLOW)
(p9) br.cond.spnt L(SCALBF_OVERFLOW)
};;
//
// Return from main path.
//
{ .mfb
nop.m 999
nop.f 0
br.ret.sptk b0;;
}
L(SCALBF_NAN_INF_ZERO):
//
// Convert N to a fp integer
//
{ .mfi
addl GR_Scratch = 1,r0
fcvt.fx.trunc.s1 FR_N_float_int = FR_Norm_N
nop.i 999
}
{ .mfi
nop.m 0
fclass.m.unc p6,p0 = FR_Floating_N, 0xc3 //@snan | @qnan
nop.i 0
};;
{ .mfi
nop.m 0
fclass.m.unc p7,p0 = FR_Floating_X, 0xc3 //@snan | @qnan
shl GR_Scratch = GR_Scratch,63
};;
{ .mfi
nop.m 0
fclass.m.unc p8,p0 = FR_Floating_N, 0x21 // @inf
nop.i 0
}
{ .mfi
nop.m 0
fclass.m.unc p9,p0 = FR_Floating_N, 0x22 // @-inf
nop.i 0
};;
//
// Either X or N is a Nan, return result and possible raise invalid.
//
{ .mfb
nop.m 0
(p6) fma.s.s0 FR_Result = FR_Floating_N,FR_Floating_X,f0
(p6) br.ret.spnt b0
};;
{ .mfb
getf.sig GR_N_as_int = FR_N_float_int
(p7) fma.s.s0 FR_Result = FR_Floating_N,FR_Floating_X,f0
(p7) br.ret.spnt b0
};;
//
// If N + Inf do something special
// For N = -Inf, create Int
//
{ .mfb
nop.m 0
(p8) fma.s.s0 FR_Result = FR_Floating_X, FR_Floating_N,f0
(p8) br.ret.spnt b0
}
{ .mfi
nop.m 0
(p9) fnma.s.s0 FR_Floating_N = FR_Floating_N, f1, f0
nop.i 0
};;
//
// If N==-Inf,return x/(-N)
//
{ .mfb
nop.m 0
(p9) frcpa.s0 FR_Result,p6 = FR_Floating_X,FR_Floating_N
(p9) br.ret.spnt b0
};;
//
// Convert N_float_int to floating point value
//
{ .mfi
cmp.ne.unc p9,p0 = GR_N_as_int,GR_Scratch
fcvt.xf FR_N_float_int = FR_N_float_int
nop.i 0
};;
//
// Is N an integer.
//
{ .mfi
nop.m 0
(p9) fcmp.neq.unc.s1 p7,p0 = FR_Norm_N, FR_N_float_int
nop.i 0
};;
//
// If N not an int, return NaN and raise invalid.
//
{ .mfb
nop.m 0
(p7) frcpa.s0 FR_Result,p6 = f0,f0
(p7) br.ret.spnt b0
};;
//
// Always return x in other path.
//
{ .mfb
nop.m 0
fma.s.s0 FR_Result = FR_Floating_X,f1,f0
br.ret.sptk b0
};;
.endp scalbf
ASM_SIZE_DIRECTIVE(scalbf)
#ifdef _LIBC
ASM_SIZE_DIRECTIVE(__ieee754_scalbf)
#endif
.proc __libm_error_region
__libm_error_region:
L(SCALBF_OVERFLOW):
L(SCALBF_UNDERFLOW):
//
// Get stack address of N
//
.prologue
{ .mfi
add GR_Parameter_Y=-32,sp
nop.f 0
.save ar.pfs,GR_SAVE_PFS
mov GR_SAVE_PFS=ar.pfs
}
//
// Adjust sp
//
{ .mfi
.fframe 64
add sp=-64,sp
nop.f 0
mov GR_SAVE_GP=gp
};;
//
// Store N on stack in correct position
// Locate the address of x on stack
//
{ .mmi
stfs [GR_Parameter_Y] = FR_Norm_N,16
add GR_Parameter_X = 16,sp
.save b0, GR_SAVE_B0
mov GR_SAVE_B0=b0
};;
//
// Store x on the stack.
// Get address for result on stack.
//
.body
{ .mib
stfs [GR_Parameter_X] = FR_Norm_X
add GR_Parameter_RESULT = 0,GR_Parameter_Y
nop.b 0
}
{ .mib
stfs [GR_Parameter_Y] = FR_Result
add GR_Parameter_Y = -16,GR_Parameter_Y
br.call.sptk b0=__libm_error_support#
};;
//
// Get location of result on stack
//
{ .mmi
nop.m 0
nop.m 0
add GR_Parameter_RESULT = 48,sp
};;
//
// Get the new result
//
{ .mmi
ldfs FR_Result = [GR_Parameter_RESULT]
.restore sp
add sp = 64,sp
mov b0 = GR_SAVE_B0
};;
//
// Restore gp, ar.pfs and return
//
{ .mib
mov gp = GR_SAVE_GP
mov ar.pfs = GR_SAVE_PFS
br.ret.sptk b0
};;
.endp __libm_error_region
ASM_SIZE_DIRECTIVE(__libm_error_region)
.type __libm_error_support#,@function
.global __libm_error_support#

551
sysdeps/ia64/fpu/e_scalbl.S Normal file
View File

@ -0,0 +1,551 @@
.file "scalbl.s"
// Copyright (c) 2000, 2001, Intel Corporation
// All rights reserved.
//
// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
//
// WARRANTY DISCLAIMER
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
// http://developer.intel.com/opensource.
//
// History
//==============================================================
// 2/02/00 Initial version
// 1/26/01 Scalb completely reworked and now standalone version
//
// API
//==============================================================
// double-extended = scalbl (double-extended x, double-extended n)
// input floating point f8 and floating point f9
// output floating point f8
//
// Returns x* 2**n using an fma and detects overflow
// and underflow.
//
//
#include "libm_support.h"
FR_Floating_X = f8
FR_Result = f8
FR_Floating_N = f9
FR_Result2 = f9
FR_Norm_N = f10
FR_Result3 = f11
FR_Norm_X = f12
FR_N_float_int = f13
FR_Two_N = f14
FR_Two_to_Big = f15
FR_Big = f6
FR_NBig = f7
GR_N_Biased = r15
GR_Big = r16
GR_NBig = r17
GR_Scratch = r18
GR_Scratch1 = r19
GR_Bias = r20
GR_N_as_int = r21
GR_SAVE_B0 = r32
GR_SAVE_GP = r33
GR_SAVE_PFS = r34
GR_Parameter_X = r35
GR_Parameter_Y = r36
GR_Parameter_RESULT = r37
GR_Tag = r38
.align 32
.global scalbl
.section .text
.proc scalbl
.align 32
scalbl:
#ifdef _LIBC
.global __ieee754_scalbl
.type __ieee754_scalbl,@function
__ieee754_scalbl:
#endif
//
// Is x NAN, INF, ZERO, +-?
//
{ .mfi
alloc r32=ar.pfs,0,3,4,0
fclass.m.unc p7,p0 = FR_Floating_X, 0xe7 //@snan | @qnan | @inf | @zero
addl GR_Scratch = 0x019C3F,r0
}
//
// Is y a NAN, INF, ZERO, +-?
//
{ .mfi
nop.m 999
fclass.m.unc p6,p0 = FR_Floating_N, 0xe7 //@snan | @qnan | @inf | @zero
addl GR_Scratch1 = 0x063BF,r0
}
;;
//
// Convert N to a fp integer
// Normalize x
//
{ .mfi
nop.m 0
fnorm.s1 FR_Norm_N = FR_Floating_N
nop.i 999
}
{ .mfi
nop.m 999
fnorm.s1 FR_Norm_X = FR_Floating_X
nop.i 999
};;
//
// Create 2*big
// Create 2**-big
// Normalize x
// Branch on special values.
//
{ .mib
setf.exp FR_Big = GR_Scratch
nop.i 0
(p6) br.cond.spnt L(SCALBL_NAN_INF_ZERO)
}
{ .mib
setf.exp FR_NBig = GR_Scratch1
nop.i 0
(p7) br.cond.spnt L(SCALBL_NAN_INF_ZERO)
};;
//
// Convert N to a fp integer
// Create -35000
//
{ .mfi
addl GR_Scratch = 1,r0
fcvt.fx.trunc.s1 FR_N_float_int = FR_Norm_N
addl GR_NBig = -35000,r0
}
;;
//
// Put N if a GP register
// Convert N_float_int to floating point value
// Create 35000
// Build the exponent Bias
//
{ .mii
getf.sig GR_N_as_int = FR_N_float_int
shl GR_Scratch = GR_Scratch,63
addl GR_Big = 35000,r0
}
{ .mfi
addl GR_Bias = 0x0FFFF,r0
fcvt.xf FR_N_float_int = FR_N_float_int
nop.i 0
};;
//
// Catch those fp values that are beyond 2**64-1
// Is N > 35000
// Is N < -35000
//
{ .mfi
cmp.ne.unc p9,p10 = GR_N_as_int,GR_Scratch
nop.f 0
nop.i 0
}
{ .mmi
cmp.ge.unc p6, p0 = GR_N_as_int, GR_Big
cmp.le.unc p8, p0 = GR_N_as_int, GR_NBig
nop.i 0
};;
//
// Is N really an int, only for those non-int indefinites?
// Create exp bias.
//
{ .mfi
add GR_N_Biased = GR_Bias,GR_N_as_int
(p9) fcmp.neq.unc.s1 p7,p0 = FR_Norm_N, FR_N_float_int
nop.i 0
};;
//
// Branch and return if N is not an int.
// Main path, create 2**N
//
{ .mfi
setf.exp FR_Two_N = GR_N_Biased
nop.i 999
}
{ .mfb
nop.m 0
(p7) frcpa f8,p11 = f0,f0
(p7) br.ret.spnt b0
};;
//
// Set denormal on denormal input x and denormal input N
//
{ .mfi
nop.m 999
(p10)fcmp.ge.s1 p6,p8 = FR_Norm_N,f0
nop.i 0
};;
{ .mfi
nop.m 999
fcmp.ge.s0 p0,p11 = FR_Floating_X,f0
nop.i 999
}
{ .mfi
nop.m 999
fcmp.ge.s0 p12,p13 = FR_Floating_N,f0
nop.i 0
};;
//
// Adjust 2**N if N was very small or very large
//
{ .mfi
nop.m 0
(p6) fma.s1 FR_Two_N = FR_Big,f1,f0
nop.i 0
}
{ .mlx
nop.m 999
(p0) movl GR_Scratch = 0x0000000000033FFF
};;
{ .mfi
nop.m 0
(p8) fma.s1 FR_Two_N = FR_NBig,f1,f0
nop.i 0
}
{ .mlx
nop.m 999
(p0) movl GR_Scratch1= 0x0000000000013FFF
};;
// Set up necessary status fields
//
// S0 user supplied status
// S2 user supplied status + WRE + TD (Overflows)
// S3 user supplied status + FZ + TD (Underflows)
//
{ .mfi
nop.m 999
(p0) fsetc.s3 0x7F,0x41
nop.i 999
}
{ .mfi
nop.m 999
(p0) fsetc.s2 0x7F,0x42
nop.i 999
};;
//
// Do final operation
//
{ .mfi
setf.exp FR_NBig = GR_Scratch
fma.s0 FR_Result = FR_Two_N,FR_Norm_X,f0
nop.i 999
}
{ .mfi
nop.m 999
fma.s3 FR_Result3 = FR_Two_N,FR_Norm_X,f0
nop.i 999
};;
{ .mfi
setf.exp FR_Big = GR_Scratch1
fma.s2 FR_Result2 = FR_Two_N,FR_Norm_X,f0
nop.i 999
};;
// Check for overflow or underflow.
//
// S0 user supplied status
// S2 user supplied status + WRE + TD (Overflow)
// S3 user supplied status + FZ + TD (Underflow)
//
//
// Restore s3
// Restore s2
//
{ .mfi
nop.m 0
fsetc.s3 0x7F,0x40
nop.i 999
}
{ .mfi
nop.m 0
fsetc.s2 0x7F,0x40
nop.i 999
};;
//
// Is the result zero?
//
{ .mfi
nop.m 999
fclass.m.unc p6, p0 = FR_Result3, 0x007
nop.i 999
}
{ .mfi
addl GR_Tag = 51, r0
fcmp.ge.unc.s1 p7, p8 = FR_Result2 , FR_Big
nop.i 0
};;
//
// Detect masked underflow - Tiny + Inexact Only
//
{ .mfi
nop.m 999
(p6) fcmp.neq.unc.s1 p6, p0 = FR_Result , FR_Result2
nop.i 999
};;
//
// Is result bigger the allowed range?
// Branch out for underflow
//
{ .mfb
(p6) addl GR_Tag = 52, r0
(p8) fcmp.le.unc.s1 p9, p10 = FR_Result2 , FR_NBig
(p6) br.cond.spnt L(SCALBL_UNDERFLOW)
};;
//
// Branch out for overflow
//
{ .mbb
nop.m 0
(p7) br.cond.spnt L(SCALBL_OVERFLOW)
(p9) br.cond.spnt L(SCALBL_OVERFLOW)
};;
//
// Return from main path.
//
{ .mfb
nop.m 999
nop.f 0
br.ret.sptk b0;;
}
L(SCALBL_NAN_INF_ZERO):
//
// Convert N to a fp integer
//
{ .mfi
addl GR_Scratch = 1,r0
fcvt.fx.trunc.s1 FR_N_float_int = FR_Norm_N
nop.i 999
}
{ .mfi
nop.m 0
fclass.m.unc p6,p0 = FR_Floating_N, 0xc3 //@snan | @qnan
nop.i 0
};;
{ .mfi
nop.m 0
fclass.m.unc p7,p0 = FR_Floating_X, 0xc3 //@snan | @qnan
shl GR_Scratch = GR_Scratch,63
};;
{ .mfi
nop.m 0
fclass.m.unc p8,p0 = FR_Floating_N, 0x21 // @inf
nop.i 0
}
{ .mfi
nop.m 0
fclass.m.unc p9,p0 = FR_Floating_N, 0x22 // @-inf
nop.i 0
};;
//
// Either X or N is a Nan, return result and possible raise invalid.
//
{ .mfb
nop.m 0
(p6) fma.s0 FR_Result = FR_Floating_N,FR_Floating_X,f0
(p6) br.ret.spnt b0
};;
{ .mfb
getf.sig GR_N_as_int = FR_N_float_int
(p7) fma.s0 FR_Result = FR_Floating_N,FR_Floating_X,f0
(p7) br.ret.spnt b0
};;
//
// If N + Inf do something special
// For N = -Inf, create Int
//
{ .mfb
nop.m 0
(p8) fma.s0 FR_Result = FR_Floating_X, FR_Floating_N,f0
(p8) br.ret.spnt b0
}
{ .mfi
nop.m 0
(p9) fnma.s0 FR_Floating_N = FR_Floating_N, f1, f0
nop.i 0
};;
//
// If N==-Inf,return x/(-N)
//
{ .mfb
nop.m 0
(p9) frcpa.s0 FR_Result,p6 = FR_Floating_X,FR_Floating_N
(p9) br.ret.spnt b0
};;
//
// Convert N_float_int to floating point value
//
{ .mfi
cmp.ne.unc p9,p0 = GR_N_as_int,GR_Scratch
fcvt.xf FR_N_float_int = FR_N_float_int
nop.i 0
};;
//
// Is N an integer.
//
{ .mfi
nop.m 0
(p9) fcmp.neq.unc.s1 p7,p0 = FR_Norm_N, FR_N_float_int
nop.i 0
};;
//
// If N not an int, return NaN and raise invalid.
//
{ .mfb
nop.m 0
(p7) frcpa.s0 FR_Result,p6 = f0,f0
(p7) br.ret.spnt b0
};;
//
// Always return x in other path.
//
{ .mfb
nop.m 0
fma.s0 FR_Result = FR_Floating_X,f1,f0
br.ret.sptk b0
};;
.endp scalbl
ASM_SIZE_DIRECTIVE(scalbl)
#ifdef _LIBC
ASM_SIZE_DIRECTIVE(__ieee754_scalbl)
#endif
.proc __libm_error_region
__libm_error_region:
L(SCALBL_OVERFLOW):
L(SCALBL_UNDERFLOW):
//
// Get stack address of N
//
.prologue
{ .mfi
add GR_Parameter_Y=-32,sp
nop.f 0
.save ar.pfs,GR_SAVE_PFS
mov GR_SAVE_PFS=ar.pfs
}
//
// Adjust sp
//
{ .mfi
.fframe 64
add sp=-64,sp
nop.f 0
mov GR_SAVE_GP=gp
};;
//
// Store N on stack in correct position
// Locate the address of x on stack
//
{ .mmi
stfe [GR_Parameter_Y] = FR_Norm_N,16
add GR_Parameter_X = 16,sp
.save b0, GR_SAVE_B0
mov GR_SAVE_B0=b0
};;
//
// Store x on the stack.
// Get address for result on stack.
//
.body
{ .mib
stfe [GR_Parameter_X] = FR_Norm_X
add GR_Parameter_RESULT = 0,GR_Parameter_Y
nop.b 0
}
{ .mib
stfe [GR_Parameter_Y] = FR_Result
add GR_Parameter_Y = -16,GR_Parameter_Y
br.call.sptk b0=__libm_error_support#
};;
//
// Get location of result on stack
//
{ .mmi
nop.m 0
nop.m 0
add GR_Parameter_RESULT = 48,sp
};;
//
// Get the new result
//
{ .mmi
ldfe FR_Result = [GR_Parameter_RESULT]
.restore sp
add sp = 64,sp
mov b0 = GR_SAVE_B0
};;
//
// Restore gp, ar.pfs and return
//
{ .mib
mov gp = GR_SAVE_GP
mov ar.pfs = GR_SAVE_PFS
br.ret.sptk b0
};;
.endp __libm_error_region
ASM_SIZE_DIRECTIVE(__libm_error_region)
.type __libm_error_support#,@function
.global __libm_error_support#

1310
sysdeps/ia64/fpu/e_sinh.S Normal file

File diff suppressed because it is too large Load Diff

1311
sysdeps/ia64/fpu/e_sinhf.S Normal file

File diff suppressed because it is too large Load Diff

1311
sysdeps/ia64/fpu/e_sinhl.S Normal file

File diff suppressed because it is too large Load Diff

347
sysdeps/ia64/fpu/e_sqrt.S Normal file
View File

@ -0,0 +1,347 @@
.file "sqrt.s"
// Copyright (c) 2000, 2001, Intel Corporation
// All rights reserved.
//
// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
//
// WARRANTY DISCLAIMER
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
// http://developer.intel.com/opensource.
//
// ********************************************************************
// History
// ********************************************************************
// 2/02/00 Initial version
// 4/04/00 Unwind support added
// 8/15/00 Bundle added after call to __libm_error_support to properly
// set [the previously overwritten] GR_Parameter_RESULT.
//
// ********************************************************************
//
// Function: Combined sqrt(x), where
// _
// sqrt(x) = |x, for double precision x values
//
// ********************************************************************
//
// Accuracy: Correctly Rounded
//
// ********************************************************************
//
// Resources Used:
//
// Floating-Point Registers: f8 (Input and Return Value)
// f7 -f14
//
// General Purpose Registers:
// r32-r36 (Locals)
// r37-r40 (Used to pass arguments to error handling routine)
//
// Predicate Registers: p6, p7, p8
//
// *********************************************************************
//
// IEEE Special Conditions:
//
// All faults and exceptions should be raised correctly.
// sqrt(QNaN) = QNaN
// sqrt(SNaN) = QNaN
// sqrt(+/-0) = +/-0
// sqrt(negative) = QNaN and error handling is called
//
// *********************************************************************
//
// Implementation:
//
// Modified Newton-Raphson Algorithm
//
// *********************************************************************
#include "libm_support.h"
GR_SAVE_PFS = r33
GR_SAVE_B0 = r34
GR_SAVE_GP = r35
GR_Parameter_X = r37
GR_Parameter_Y = r38
GR_Parameter_RESULT = r39
.section .text
.proc sqrt#
.global sqrt#
.align 64
sqrt:
#ifdef _LIBC
.global __sqrt
.type __sqrt,@function
__sqrt:
.global __ieee754_sqrt
.type __ieee754_sqrt,@function
__ieee754_sqrt:
#endif
{ .mfi
alloc r32= ar.pfs,0,5,4,0
frsqrta.s0 f7,p6=f8
nop.i 0
} { .mlx
// BEGIN DOUBLE PRECISION MINIMUM LATENCY SQUARE ROOT ALGORITHM
nop.m 0
// exponent of +1/2 in r2
movl r2 = 0x0fffe;;
} { .mmi
// +1/2 in f9
setf.exp f9 = r2
nop.m 0
nop.i 0
} { .mlx
nop.m 0
// 3/2 in r3
movl r3=0x3fc00000;;
} { .mfi
setf.s f10=r3
// Step (1)
// y0 = 1/sqrt(a) in f7
fclass.m.unc p7,p8 = f8,0x3A
nop.i 0;;
} { .mlx
nop.m 0
// 5/2 in r2
movl r2 = 0x40200000
} { .mlx
nop.m 0
// 63/8 in r3
movl r3 = 0x40fc0000;;
} { .mfi
setf.s f11=r2
// Step (2)
// h = +1/2 * y0 in f6
(p6) fma.s1 f6=f9,f7,f0
nop.i 0
} { .mfi
setf.s f12=r3
// Step (3)
// g = a * y0 in f7
(p6) fma.s1 f7=f8,f7,f0
nop.i 0
} { .mfi
nop.m 0
mov f15 = f8
nop.i 0;;
} { .mlx
nop.m 0
// 231/16 in r2
movl r2 = 0x41670000;;
} { .mfi
setf.s f13=r2
// Step (4)
// e = 1/2 - g * h in f9
(p6) fnma.s1 f9=f7,f6,f9
nop.i 0
} { .mlx
nop.m 0
// 35/8 in r3
movl r3 = 0x408c0000;;
} { .mfi
setf.s f14=r3
// Step (5)
// S = 3/2 + 5/2 * e in f10
(p6) fma.s1 f10=f11,f9,f10
nop.i 0
} { .mfi
nop.m 0
// Step (6)
// e2 = e * e in f11
(p6) fma.s1 f11=f9,f9,f0
nop.i 0;;
} { .mfi
nop.m 0
// Step (7)
// t = 63/8 + 231/16 * e in f12
(p6) fma.s1 f12=f13,f9,f12
nop.i 0;;
} { .mfi
nop.m 0
// Step (8)
// S1 = e + e2 * S in f10
(p6) fma.s1 f10=f11,f10,f9
nop.i 0
} { .mfi
nop.m 0
// Step (9)
// e4 = e2 * e2 in f11
(p6) fma.s1 f11=f11,f11,f0
nop.i 0;;
} { .mfi
nop.m 0
// Step (10)
// t1 = 35/8 + e * t in f9
(p6) fma.s1 f9=f9,f12,f14
nop.i 0;;
} { .mfi
nop.m 0
// Step (11)
// G = g + S1 * g in f12
(p6) fma.s1 f12=f10,f7,f7
nop.i 0
} { .mfi
nop.m 0
// Step (12)
// E = g * e4 in f7
(p6) fma.s1 f7=f7,f11,f0
nop.i 0;;
} { .mfi
nop.m 0
// Step (13)
// u = S1 + e4 * t1 in f10
(p6) fma.s1 f10=f11,f9,f10
nop.i 0;;
} { .mfi
nop.m 0
// Step (14)
// g1 = G + t1 * E in f7
(p6) fma.d.s1 f7=f9,f7,f12
nop.i 0;;
} { .mfi
nop.m 0
// Step (15)
// h1 = h + u * h in f6
(p6) fma.s1 f6=f10,f6,f6
nop.i 0;;
} { .mfi
nop.m 0
// Step (16)
// d = a - g1 * g1 in f9
(p6) fnma.s1 f9=f7,f7,f8
nop.i 0;;
} { .mfb
nop.m 0
// Step (17)
// g2 = g1 + d * h1 in f7
(p6) fma.d.s0 f8=f9,f6,f7
(p6) br.ret.sptk b0 ;;
}
{ .mfb
nop.m 0
(p0) mov f8 = f7
(p8) br.ret.sptk b0 ;;
}
{ .mfb
(p7) mov r40 = 49
nop.f 0
(p7) br.cond.sptk __libm_error_region ;;
}
// END DOUBLE PRECISION MINIMUM LATENCY SQUARE ROOT ALGORITHM
.endp sqrt#
ASM_SIZE_DIRECTIVE(sqrt)
#ifdef _LIBC
ASM_SIZE_DIRECTIVE(__sqrt)
ASM_SIZE_DIRECTIVE(__ieee754_sqrt)
#endif
// Stack operations when calling error support.
// (1) (2) (3) (call) (4)
// sp -> + psp -> + psp -> + sp -> +
// | | | |
// | | <- GR_Y R3 ->| <- GR_RESULT | -> f8
// | | | |
// | <-GR_Y Y2->| Y2 ->| <- GR_Y |
// | | | |
// | | <- GR_X X1 ->| |
// | | | |
// sp-64 -> + sp -> + sp -> + +
// save ar.pfs save b0 restore gp
// save gp restore ar.pfs
.proc __libm_error_region
__libm_error_region:
//
// This branch includes all those special values that are not negative,
// with the result equal to frcpa(x)
//
.prologue
// We are distinguishing between over(under)flow and letting
// __libm_error_support set ERANGE or do anything else needed.
// (1)
{ .mfi
add GR_Parameter_Y=-32,sp // Parameter 2 value
nop.f 0
.save ar.pfs,GR_SAVE_PFS
mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
}
{ .mfi
.fframe 64
add sp=-64,sp // Create new stack
nop.f 0
mov GR_SAVE_GP=gp // Save gp
};;
// (2)
{ .mmi
stfd [GR_Parameter_Y] = f0,16 // STORE Parameter 2 on stack
add GR_Parameter_X = 16,sp // Parameter 1 address
.save b0, GR_SAVE_B0
mov GR_SAVE_B0=b0 // Save b0
};;
.body
// (3)
{ .mib
stfd [GR_Parameter_X] = f15 // STORE Parameter 1 on stack
add GR_Parameter_RESULT = 0,GR_Parameter_Y // Parameter 3 address
nop.b 0
}
{ .mib
stfd [GR_Parameter_Y] = f8 // STORE Parameter 3 on stack
add GR_Parameter_Y = -16,GR_Parameter_Y
br.call.sptk b0=__libm_error_support# // Call error handling function
};;
{ .mmi
nop.m 0
nop.m 0
add GR_Parameter_RESULT = 48,sp
};;
// (4)
{ .mmi
ldfd f8 = [GR_Parameter_RESULT] // Get return result off stack
.restore sp
add sp = 64,sp // Restore stack pointer
mov b0 = GR_SAVE_B0 // Restore return address
};;
{ .mib
mov gp = GR_SAVE_GP // Restore gp
mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
br.ret.sptk b0 // Return
};;
.endp __libm_error_region
ASM_SIZE_DIRECTIVE(__libm_error_region)
.type __libm_error_support#,@function
.global __libm_error_support#

266
sysdeps/ia64/fpu/e_sqrtf.S Normal file
View File

@ -0,0 +1,266 @@
.file "sqrtf.s"
// Copyright (c) 2000, 2001, Intel Corporation
// All rights reserved.
//
// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
//
// WARRANTY DISCLAIMER
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
// http://developer.intel.com/opensource.
//
// *********************************************************************
// History:
//
// 2/02/00 Initial version
// 4/04/00 Unwind support added
// 8/15/00 Bundle added after call to __libm_error_support to properly
// set [the previously overwritten] GR_Parameter_RESULT.
//
// *********************************************************************
//
// Function: Combined sqrtf(x), where
// _
// sqrtf(x) = |x, for single precision x values
//
// ********************************************************************
//
// Accuracy: Correctly Rounded
//
// ********************************************************************
//
// Resources Used:
//
// Floating-Point Registers: f8 (Input and Return Value)
// f7 -f14
//
// General Purpose Registers:
// r32-r36 (Locals)
// r37-r40 (Used to pass arguments to error handling routine)
//
// Predicate Registers: p6, p7, p8
//
// ********************************************************************
//
// IEEE Special Conditions:
//
// All faults and exceptions should be raised correctly.
// sqrtf(QNaN) = QNaN
// sqrtf(SNaN) = QNaN
// sqrtf(+/-0) = +/-0
// sqrtf(negative) = QNaN and error handling is called
//
// ********************************************************************
//
// Implementation:
//
// Modified Newton-Raphson Algorithm
//
// ********************************************************************
#include "libm_support.h"
GR_SAVE_B0 = r34
GR_SAVE_PFS = r33
GR_SAVE_GP = r35
GR_Parameter_X = r37
GR_Parameter_Y = r38
GR_Parameter_RESULT = r39
GR_Parameter_TAG = r40
FR_X = f13
FR_Y = f0
FR_RESULT = f8
.section .text
.proc sqrtf#
.global sqrtf#
.align 64
sqrtf:
#ifdef _LIBC
.global __sqrtf
.type __sqrtf,@function
__sqrtf:
.global __ieee754_sqrtf
.type __ieee754_sqrtf,@function
__ieee754_sqrtf:
#endif
{ .mlx
// BEGIN SINGLE PRECISION MINIMUM LATENCY SQUARE ROOT ALGORITHM
alloc r32= ar.pfs,0,5,4,0
// exponent of +1/2 in r2
movl r2 = 0x0fffe
} { .mfi
// +1/2 in f12
nop.m 0
frsqrta.s0 f7,p6=f8
nop.i 0;;
} { .mfi
setf.exp f12 = r2
// Step (1)
// y0 = 1/sqrt(a) in f7
fclass.m.unc p7,p8 = f8,0x3A
nop.i 0
} { .mfi
nop.m 0
// Make a copy of x just in case
mov f13 = f8
nop.i 0;;
} { .mfi
nop.m 0
// Step (2)
// H0 = 1/2 * y0 in f9
(p6) fma.s1 f9=f12,f7,f0
nop.i 0
} { .mfi
nop.m 0
// Step (3)
// S0 = a * y0 in f7
(p6) fma.s1 f7=f8,f7,f0
nop.i 0;;
} { .mfi
nop.m 0
// Step (4)
// d = 1/2 - S0 * H0 in f10
(p6) fnma.s1 f10=f7,f9,f12
nop.i 0
} { .mfi
nop.m 0
// Step (0'')
// 3/2 = 1 + 1/2 in f12
(p6) fma.s1 f12=f12,f1,f1
nop.i 0;;
} { .mfi
nop.m 0
// Step (5)
// e = 1 + 3/2 * d in f12
(p6) fma.s1 f12=f12,f10,f1
nop.i 0
} { .mfi
nop.m 0
// Step (6)
// T0 = d * S0 in f11
(p6) fma.s1 f11=f10,f7,f0
nop.i 0;;
} { .mfi
nop.m 0
// Step (7)
// G0 = d * H0 in f10
(p6) fma.s1 f10=f10,f9,f0
nop.i 0;;
} { .mfi
nop.m 0
// Step (8)
// S1 = S0 + e * T0 in f7
(p6) fma.s.s1 f7=f12,f11,f7
nop.i 0;;
} { .mfi
nop.m 0
// Step (9)
// H1 = H0 + e * G0 in f12
(p6) fma.s1 f12=f12,f10,f9
nop.i 0;;
} { .mfi
nop.m 0
// Step (10)
// d1 = a - S1 * S1 in f9
(p6) fnma.s1 f9=f7,f7,f8
nop.i 0;;;
} { .mfb
nop.m 0
// Step (11)
// S = S1 + d1 * H1 in f7
(p6) fma.s.s0 f8=f9,f12,f7
(p6) br.ret.sptk b0 ;;
// END SINGLE PRECISION MINIMUM LATENCY SQUARE ROOT ALGORITHM
} { .mfb
nop.m 0
(p0) mov f8 = f7
(p8) br.ret.sptk b0 ;;
}
//
// This branch includes all those special values that are not negative,
// with the result equal to frcpa(x)
//
.endp sqrtf
ASM_SIZE_DIRECTIVE(sqrtf)
#ifdef _LIBC
ASM_SIZE_DIRECTIVE(__sqrtf)
ASM_SIZE_DIRECTIVE(__ieee754_sqrtf)
#endif
.proc __libm_error_region
__libm_error_region:
.prologue
{ .mii
add GR_Parameter_Y=-32,sp // Parameter 2 value
(p0) mov GR_Parameter_TAG = 50
.save ar.pfs,GR_SAVE_PFS
mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
}
{ .mfi
.fframe 64
add sp=-64,sp // Create new stack
nop.f 0
mov GR_SAVE_GP=gp // Save gp
};;
{ .mmi
stfs [GR_Parameter_Y] = FR_Y,16 // Store Parameter 2 on stack
add GR_Parameter_X = 16,sp // Parameter 1 address
.save b0, GR_SAVE_B0
mov GR_SAVE_B0=b0 // Save b0
};;
.body
{ .mib
stfs [GR_Parameter_X] = FR_X // Store Parameter 1 on stack
add GR_Parameter_RESULT = 0,GR_Parameter_Y
nop.b 0 // Parameter 3 address
}
{ .mib
stfs [GR_Parameter_Y] = FR_RESULT // Store Parameter 3 on stack
add GR_Parameter_Y = -16,GR_Parameter_Y
br.call.sptk b0=__libm_error_support# // Call error handling function
};;
{ .mmi
nop.m 0
nop.m 0
add GR_Parameter_RESULT = 48,sp
};;
{ .mmi
ldfs f8 = [GR_Parameter_RESULT] // Get return result off stack
.restore sp
add sp = 64,sp // Restore stack pointer
mov b0 = GR_SAVE_B0 // Restore return address
};;
{ .mib
mov gp = GR_SAVE_GP // Restore gp
mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
br.ret.sptk b0 // Return
};;
.endp __libm_error_region
ASM_SIZE_DIRECTIVE(__libm_error_region)
.type __libm_error_support#,@function
.global __libm_error_support#

281
sysdeps/ia64/fpu/e_sqrtl.S Normal file
View File

@ -0,0 +1,281 @@
.file "sqrtl.s"
// Copyright (c) 2000, 2001, Intel Corporation
// All rights reserved.
//
// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
//
// WARRANTY DISCLAIMER
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
// http://developer.intel.com/opensource.
//
// ********************************************************************
//
// History:
// 2/02/00 (hand-optimized)
// 4/04/00 Unwind support added
// 8/15/00 Bundle added after call to __libm_error_support to properly
// set [the previously overwritten] GR_Parameter_RESULT.
//
// ********************************************************************
//
// Function: Combined sqrtl(x), where
// _
// sqrtl(x) = |x, for double-extended precision x values
//
// ********************************************************************
//
// Resources Used:
//
// Floating-Point Registers: f8 (Input and Return Value)
// f7 -f14
//
// General Purpose Registers:
// r32-r36 (Locals)
// r37-r40 (Used to pass arguments to error handling routine)
//
// Predicate Registers: p6, p7, p8
//
// ********************************************************************
//
// IEEE Special Conditions:
//
// All faults and exceptions should be raised correctly.
// sqrtl(QNaN) = QNaN
// sqrtl(SNaN) = QNaN
// sqrtl(+/-0) = +/-0
// sqrtl(negative) = QNaN and error handling is called
//
// ********************************************************************
//
// Implementation:
//
// Modified Newton-Raphson Algorithm
//
// ********************************************************************
#include "libm_support.h"
GR_SAVE_PFS = r33
GR_SAVE_B0 = r34
GR_SAVE_GP = r35
GR_Parameter_X = r37
GR_Parameter_Y = r38
GR_Parameter_RESULT = r39
GR_Parameter_TAG = r40
FR_X = f15
FR_Y = f0
FR_RESULT = f8
.section .text
.proc sqrtl#
.global sqrtl#
.align 64
sqrtl:
#ifdef _LIBC
.global __sqrtl
.type __sqrtl,@function
__sqrtl:
.global __ieee754_sqrtl
.type __ieee754_sqrtl,@function
__ieee754_sqrtl:
#endif
{ .mlx
alloc r32= ar.pfs,0,5,4,0
// exponent of +1/2 in r2
movl r2 = 0x0fffe;;
} { .mfi
// +1/2 in f10
setf.exp f12 = r2
// Step (1)
// y0 = 1/sqrt(a) in f7
frsqrta.s0 f7,p6=f8
nop.i 0;;
} { .mfi
nop.m 0
// Step (2)
// H0 = +1/2 * y0 in f9
(p6) fma.s1 f9=f12,f7,f0
nop.i 0
} { .mfi
nop.m 0
// Step (3)
// S0 = a * y0 in f7
(p6) fma.s1 f7=f8,f7,f0
nop.i 0;;
} { .mfi
nop.m 0
// Make copy input x
mov f13=f8
nop.i 0
} { .mfi
nop.m 0
fclass.m.unc p7,p8 = f8,0x3A
nop.i 0;;
} { .mfi
nop.m 0
// Step (4)
// d0 = 1/2 - S0 * H0 in f10
(p6) fnma.s1 f10=f7,f9,f12
nop.i 0;;
}
{ .mfi
nop.m 0
(p0) mov f15=f8
nop.i 0;;
} { .mfi
nop.m 0
// Step (5)
// H1 = H0 + d0 * H0 in f9
(p6) fma.s1 f9=f10,f9,f9
nop.i 0
} { .mfi
nop.m 0
// Step (6)
// S1 = S0 + d0 * S0 in f7
(p6) fma.s1 f7=f10,f7,f7
nop.i 0;;
} { .mfi
nop.m 0
// Step (7)
// d1 = 1/2 - S1 * H1 in f10
(p6) fnma.s1 f10=f7,f9,f12
nop.i 0;;
} { .mfi
nop.m 0
// Step (8)
// H2 = H1 + d1 * H1 in f9
(p6) fma.s1 f9=f10,f9,f9
nop.i 0
} { .mfi
nop.m 0
// Step (9)
// S2 = S1 + d1 * S1 in f7
(p6) fma.s1 f7=f10,f7,f7
nop.i 0;;
} { .mfi
nop.m 0
// Step (10)
// d2 = 1/2 - S2 * H2 in f10
(p6) fnma.s1 f10=f7,f9,f12
nop.i 0
} { .mfi
nop.m 0
// Step (11)
// e2 = a - S2 * S2 in f12
(p6) fnma.s1 f12=f7,f7,f8
nop.i 0;;
} { .mfi
nop.m 0
// Step (12)
// S3 = S2 + d2 * S2 in f7
(p6) fma.s1 f7=f12,f9,f7
nop.i 0
} { .mfi
nop.m 0
// Step (13)
// H3 = H2 + d2 * H2 in f9
(p6) fma.s1 f9=f10,f9,f9
nop.i 0;;
} { .mfi
nop.m 0
// Step (14)
// e3 = a - S3 * S3 in f12
(p6) fnma.s1 f12=f7,f7,f8
nop.i 0;;
} { .mfb
nop.m 0
// Step (15)
// S = S3 + e3 * H3 in f7
(p6) fma.s0 f8=f12,f9,f7
(p6) br.ret.sptk b0 ;;
}
{ .mfb
(p0) mov GR_Parameter_TAG = 48
(p0) mov f8 = f7
(p8) br.ret.sptk b0 ;;
}
//
// This branch includes all those special values that are not negative,
// with the result equal to frcpa(x)
//
// END DOUBLE EXTENDED PRECISION MINIMUM LATENCY SQUARE ROOT ALGORITHM
.endp sqrtl#
ASM_SIZE_DIRECTIVE(sqrtl)
#ifdef _LIBC
ASM_SIZE_DIRECTIVE(__sqrtl)
ASM_SIZE_DIRECTIVE(__ieee754_sqrtl)
#endif
.proc __libm_error_region
__libm_error_region:
.prologue
{ .mfi
add GR_Parameter_Y=-32,sp // Parameter 2 value
nop.f 0
.save ar.pfs,GR_SAVE_PFS
mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
}
{ .mfi
.fframe 64
add sp=-64,sp // Create new stack
nop.f 0
mov GR_SAVE_GP=gp // Save gp
};;
{ .mmi
stfe [GR_Parameter_Y] = FR_Y,16 // Save Parameter 2 on stack
add GR_Parameter_X = 16,sp // Parameter 1 address
.save b0, GR_SAVE_B0
mov GR_SAVE_B0=b0 // Save b0
};;
.body
{ .mib
stfe [GR_Parameter_X] = FR_X // Store Parameter 1 on stack
add GR_Parameter_RESULT = 0,GR_Parameter_Y
nop.b 0 // Parameter 3 address
}
{ .mib
stfe [GR_Parameter_Y] = FR_RESULT // Store Parameter 3 on stack
add GR_Parameter_Y = -16,GR_Parameter_Y
br.call.sptk b0=__libm_error_support# // Call error handling function
};;
{ .mmi
nop.m 0
nop.m 0
add GR_Parameter_RESULT = 48,sp
};;
{ .mmi
ldfe f8 = [GR_Parameter_RESULT] // Get return result off stack
.restore sp
add sp = 64,sp // Restore stack pointer
mov b0 = GR_SAVE_B0 // Restore return address
};;
{ .mib
mov gp = GR_SAVE_GP // Restore gp
mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
br.ret.sptk b0 // Return
};;
.endp __libm_error_region
ASM_SIZE_DIRECTIVE(__libm_error_region)
.type __libm_error_support#,@function
.global __libm_error_support#

View File

@ -0,0 +1 @@
/* Not needed. */

View File

@ -0,0 +1 @@
/* Not needed. */

View File

@ -0,0 +1 @@
/* Not needed. */

View File

@ -1,31 +1,16 @@
# Begin of automatic generation
# acos
Test "acos (0.7) == 0.7953988301841435554":
float: 1
ifloat: 1
ildouble: 1150
ldouble: 1150
# acosh
Test "acosh (7) == 2.6339157938496334172":
ldouble: 1
ildouble: 1
# asin
Test "asin (-0.5) == -pi/6":
float: 2
ifloat: 2
Test "asin (0.5) == pi/6":
float: 2
ifloat: 2
Test "asin (0.7) == 0.77539749661075306374035335271498708":
float: 2
ifloat: 2
double: 1
idouble: 1
ildouble: 1147
ldouble: 1147
ldouble: 1
ildouble: 1
# asinh
Test "asinh (0.7) == 0.652666566082355786":
@ -33,17 +18,9 @@ ildouble: 656
ldouble: 656
# atan
Test "atan (0.7) == 0.6107259643892086165":
ildouble: 549
ldouble: 549
# atan2
Test "atan2 (0.4, 0.0003) == 1.5700463269355215718":
ildouble: 1
ldouble: 1
Test "atan2 (0.7, 1) == 0.6107259643892086165":
ildouble: 549
ldouble: 549
#Test "atan (0.7) == 0.6107259643892086165":
#ildouble: 549
#ldouble: 549
# atanh
Test "atanh (0.7) == 0.8673005276940531944":
@ -98,8 +75,8 @@ double: 1
float: 7
idouble: 1
ifloat: 7
ildouble: 5
ldouble: 5
ildouble: 6
ldouble: 6
Test "Imaginary part of: cacosh (-2 - 3 i) == -1.9833870299165354323 + 2.1414491111159960199 i":
double: 1
float: 4
@ -215,27 +192,9 @@ ildouble: 447
ldouble: 447
# cbrt
Test "cbrt (-0.001) == -0.1":
ildouble: 717
ldouble: 717
Test "cbrt (-27.0) == -3.0":
double: 1
idouble: 1
ildouble: 948
ldouble: 948
Test "cbrt (0.7) == 0.8879040017426007084":
double: 1
idouble: 1
ildouble: 346
ldouble: 346
Test "cbrt (0.970299) == 0.99":
double: 1
idouble: 1
ildouble: 306
ldouble: 306
Test "cbrt (8) == 2":
ildouble: 191
ldouble: 191
# ccos
Test "Real part of: ccos (-2 - 3 i) == -4.1896256909688072301 - 9.1092278937553365979 i":
@ -453,15 +412,17 @@ ldouble: 0.25
# cosh
Test "cosh (0.7) == 1.255169005630943018":
ildouble: 309
ldouble: 309
ildouble: 2
ldouble: 2
# cpow
Test "Real part of: cpow (2 + 3 i, 4 + 0 i) == -119.0 - 120.0 i":
double: 1
float: 4
float: 5
idouble: 1
ifloat: 4
ifloat: 5
ldouble: 1
ildouble: 1
Test "Imaginary part of: cpow (2 + 3 i, 4 + 0 i) == -119.0 - 120.0 i":
float: 2
ifloat: 2
@ -475,6 +436,9 @@ idouble: 1.104
ifloat: 2.5333
ildouble: 1
ldouble: 1
Test "Real part of: cpow (2 + 0 i, 10 + 0 i) == 1024.0 + 0.0 i":
ldouble: 1
ildouble: 1
# csin
Test "Real part of: csin (0.7 + 1.2 i) == 1.1664563419657581376 + 1.1544997246948547371 i":
@ -575,8 +539,8 @@ ldouble: 2
Test "Imaginary part of: ctanh (-2 - 3 i) == -0.9653858790221331242 + 0.0098843750383224937 i":
float: 1
ifloat: 1
ildouble: 23
ldouble: 23
ildouble: 24
ldouble: 24
Test "Real part of: ctanh (0 + pi/4 i) == 0.0 + 1.0 i":
Test "Imaginary part of: ctanh (0 + pi/4 i) == 0.0 + 1.0 i":
float: 1
@ -655,6 +619,8 @@ float: 1
ifloat: 1
double: 1
idouble: 1
ldouble: 1
ildouble: 1
# fmod
Test "fmod (-6.5, -2.3) == -1.9":
@ -906,21 +872,17 @@ ildouble: 725
ldouble: 725
# sin
Test "sin (0.7) == 0.64421768723769105367":
ildouble: 627
ldouble: 627
Test "sin (0.7) == 0.64421768723769105367261435139872014":
ildouble: 1
ldouble: 1
# sincos
Test "sincos (0.7, &sin_res, &cos_res) puts 0.64421768723769105367 in sin_res":
ildouble: 627
ldouble: 627
Test "sincos (0.7, &sin_res, &cos_res) puts 0.64421768723769105367261435139872014 in sin_res":
ldouble: 1
ildouble: 1
Test "sincos (0.7, &sin_res, &cos_res) puts 0.76484218728448842625585999019186495 in cos_res":
float: 1
ifloat: 1
double: 1
idouble: 1
ildouble: 528
ldouble: 528
Test "sincos (M_PI_6l*2.0, &sin_res, &cos_res) puts 0.5 in cos_res":
double: 1
float: 0.5
@ -1005,6 +967,8 @@ float: 1
ifloat: 1
double: 2
idouble: 2
ldouble: 2
ildouble: 2
Test "y0 (1.0) == 0.088256964215676957983":
double: 2
float: 1
@ -1028,6 +992,8 @@ float: 1
ifloat: 1
double: 1
idouble: 1
ldouble: 1
ildouble: 1
# y1
Test "y1 (0.1) == -6.4589510947020269877":
@ -1174,17 +1140,11 @@ idouble: 1
ifloat: 1
# Maximal error of functions:
Function: "acos":
ildouble: 1149
ldouble: 1149
Function: "asin":
float: 2
ifloat: 2
double: 1
idouble: 1
ildouble: 1147
ldouble: 1147
ldouble: 1
ildouble: 1
Function: "asinh":
double: 1
@ -1192,14 +1152,6 @@ idouble: 1
ildouble: 656
ldouble: 656
Function: "atan":
ildouble: 549
ldouble: 549
Function: "atan2":
ildouble: 549
ldouble: 549
Function: "atanh":
double: 1
idouble: 1
@ -1305,8 +1257,6 @@ ldouble: 447
Function: "cbrt":
double: 1
idouble: 1
ildouble: 948
ldouble: 948
Function: Real part of "ccos":
double: 1
@ -1389,22 +1339,24 @@ ildouble: 529
ldouble: 529
Function: "cosh":
ildouble: 309
ldouble: 309
ildouble: 2
ldouble: 2
Function: Real part of "cpow":
double: 1
float: 4
float: 5
idouble: 1
ifloat: 4
ifloat: 5
ldouble: 1
ildouble: 1
Function: Imaginary part of "cpow":
double: 1.104
float: 2.5333
idouble: 1.104
ifloat: 2.5333
ildouble: 2
ldouble: 2
ildouble: 4
ldouble: 4
Function: Real part of "csin":
float: 1
@ -1639,6 +1591,8 @@ double: 2
float: 1
idouble: 2
ifloat: 1
ldouble: 2
ildouble: 2
Function: "y1":
double: 3

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,185 @@
.file "libm_frexp_4.s"
// Copyright (c) 2000, 2001, Intel Corporation
// All rights reserved.
//
// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
//
// WARRANTY DISCLAIMER
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
// http://developer.intel.com/opensource.
//
// History
//==============================================================
// 2/02/00: Initial version
// 3/20/00: Improved speed
// 6/01/00: Fixed bug when x a double-extended denormal
// 12/08/00 Corrected label on .endp
//
// API
//==============================================================
// double frexp(double x, int* y)
// double __libm_frexp_4(double x, int* y)
// where int* y is a 32-bit integer
//
// Overview of operation
//==============================================================
// break a floating point x number into fraction and an exponent
// The fraction is returned as a double
// The exponent is returned as an integer pointed to by y
// This is a true (not a biased exponent) but 0fffe is subtracted
// as a bias instead of 0xffff. This is because the fraction returned
// is between 0.5 and 1.0, not the expected IEEE range.
//
// The fraction is 0.5 <= fraction < 1.0
//
// Registers used
//==============================================================
//
// general registers:
// r14 exponent bias for x negative
// r15 exponent bias for x positive
// r16 signexp of x
// r17 exponent mask
// r18 exponent of x
// r19 exponent result
// r20 signexp of 2^64
// r32 on input contains the 64-bit IEEE double that is in f8
// r33 on input pointer to 32-bit integer for exponent
//
// predicate registers:
// p6 set if x is Nan, zero, or infinity
// p7 set if x negative
// p8 set if x positive
// p9 set if x double-extended denormal
//
// floating-point registers:
// f8 input, output
// f9 normalized x
// f10 signexp for significand result for x positive
// f11 signexp for significand result for x negative
// f12 2^64
#include "libm_support.h"
.align 32
.global __libm_frexp_4#
.section .text
.proc __libm_frexp_4#
.align 32
__libm_frexp_4:
// Set signexp for significand result for x>0
// If x is a NaN, zero, or infinity, return it.
// Put 0 in the int pointer.
// x NAN, ZERO, INFINITY?
// Set signexp for significand result for x<0
{ .mfi
(p0) mov r15 = 0x0fffe
(p0) fclass.m.unc p6,p0 = f8, 0xe7
(p0) mov r14 = 0x2fffe
}
// Form signexp of 2^64 in case x double-extended denormal
// Save the normalized value of input in f9
// The normalization also sets fault flags and takes faults if necessary
{ .mfi
(p0) mov r20 = 0x1003f
(p0) fnorm f9 = f8
nop.i 999 ;;
}
// Move signexp for significand result for x>0 to FP reg
// Form 2^64 in case x double-extended denormal
{ .mmi
(p0) setf.exp f10 = r15
(p0) setf.exp f12 = r20
nop.i 999 ;;
}
// Move signexp for significand result for x<0 to FP reg
// If x NAN, ZERO, INFINITY, set *y=0 as a 32-bit integer, and exit
{ .mmb
(p0) setf.exp f11 = r14
(p6) st4 [r33] = r0
(p6) br.ret.spnt b0 ;;
}
// Form exponent mask
// p7 if x<0, else p8
{ .mfi
(p0) mov r17 = 0x1ffff
(p0) fcmp.lt.unc p7,p8 = f8,f0
nop.i 999 ;;
}
// Test for fnorm(x) denormal, means x double-extended denormal
{ .mfi
nop.m 999
(p0) fclass.m.unc p9,p0 = f9, 0x0b
nop.i 999 ;;
}
// If x double-extended denormal add 64 to exponent bias for scaling
// If x double-extended denormal multiply x * 2^64 which is normal
{ .mfi
(p9) add r15 = 64, r15
(p9) fmpy f9 = f9, f12
nop.i 999 ;;
}
// true exponent stored to int pointer
// the bias is treated as 0xfffe instead of
// normal 0xffff because we want the significand
// to be in the range <=0.5 sig < 1.0
// Store the value of the exponent at the pointer in r33
// If x>0 form significand result
{ .mfi
nop.m 999
(p8) fmerge.se f8 = f10,f9
nop.i 999 ;;
}
// Get signexp of normalized x
// If x<0 form significand result
{ .mfi
(p0) getf.exp r16 = f9
(p7) fmerge.se f8 = f11,f9
nop.i 999 ;;
}
// Get exp of normalized x
// Subtract off bias to get true exponent of x
{ .mmi
(p0) and r18 = r17,r16 ;;
(p0) sub r19 = r18,r15
nop.i 999 ;;
}
// Store int y as a 32-bit integer
// Make the value a double
{ .mfb
(p0) st4 [r33] = r19
(p0) fnorm.d f8 = f8
(p0) br.ret.sptk b0 ;;
}
.endp __libm_frexp_4
ASM_SIZE_DIRECTIVE(__libm_frexp_4)

View File

@ -0,0 +1,185 @@
.file "libm_frexp_4f.s"
// Copyright (c) 2000, 2001, Intel Corporation
// All rights reserved.
//
// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
//
// WARRANTY DISCLAIMER
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
// http://developer.intel.com/opensource.
//
// History
//==============================================================
// 2/02/00: Initial version
// 3/20/00: Improved speed
// 6/01/00: Fixed bug when x a double-extended denormal
// 12/08/00 Corrected label on .endp
//
// API
//==============================================================
// float frexp(float x, int* y)
// float __libm_frexp_4f(float x, int* y)
// where int* y is a 32-bit integer
//
// Overview of operation
//==============================================================
// break a floating point x number into fraction and an exponent
// The fraction is returned as a float
// The exponent is returned as an integer pointed to by y
// This is a true (not a biased exponent) but 0fffe is subtracted
// as a bias instead of 0xffff. This is because the fraction returned
// is between 0.5 and 1.0, not the expected IEEE range.
//
// The fraction is 0.5 <= fraction < 1.0
//
// Registers used
//==============================================================
// general registers:
// r14 exponent bias for x negative
// r15 exponent bias for x positive
// r16 signexp of x
// r17 exponent mask
// r18 exponent of x
// r19 exponent result
// r20 signexp of 2^64
// r32 on input contains the 32-bit IEEE float that is in f8
// r33 on input pointer to 32-bit integer for exponent
// predicate registers:
// p6 set if x is Nan, zero, or infinity
// p7 set if x negative
// p8 set if x positive
// p9 set if x double-extended denormal
// floating-point registers:
// f8 input, output
// f9 normalized x
// f10 signexp for significand result for x positive
// f11 signexp for significand result for x negative
// f12 2^64
#include "libm_support.h"
.align 32
.global __libm_frexp_4f#
.section .text
.proc __libm_frexp_4f#
.align 32
__libm_frexp_4f:
// Set signexp for significand result for x>0
// If x is a NaN, zero, or infinity, return it.
// Put 0 in the int pointer.
// x NAN, ZERO, INFINITY?
// Set signexp for significand result for x<0
{ .mfi
(p0) mov r15 = 0x0fffe
(p0) fclass.m.unc p6,p0 = f8, 0xe7
(p0) mov r14 = 0x2fffe
}
// Form signexp of 2^64 in case x double-extended denormal
// Save the normalized value of input in f9
// The normalization also sets fault flags and takes faults if necessary
{ .mfi
(p0) mov r20 = 0x1003f
(p0) fnorm f9 = f8
nop.i 999 ;;
}
// Move signexp for significand result for x>0 to FP reg
// Form 2^64 in case x double-extended denormal
{ .mmi
(p0) setf.exp f10 = r15
(p0) setf.exp f12 = r20
nop.i 999 ;;
}
// Move signexp for significand result for x<0 to FP reg
// If x NAN, ZERO, INFINITY, set *y=0 as a 32-bit integer, and exit
{ .mmb
(p0) setf.exp f11 = r14
(p6) st4 [r33] = r0
(p6) br.ret.spnt b0 ;;
}
// Form exponent mask
// p7 if x<0, else p8
{ .mfi
(p0) mov r17 = 0x1ffff
(p0) fcmp.lt.unc p7,p8 = f8,f0
nop.i 999 ;;
}
// Test for fnorm(x) denormal, means x double-extended denormal
{ .mfi
nop.m 999
(p0) fclass.m.unc p9,p0 = f9, 0x0b
nop.i 999 ;;
}
// If x double-extended denormal add 64 to exponent bias for scaling
// If x double-extended denormal multiply x * 2^64 which is normal
{ .mfi
(p9) add r15 = 64, r15
(p9) fmpy f9 = f9, f12
nop.i 999 ;;
}
// true exponent stored to int pointer
// the bias is treated as 0xfffe instead of
// normal 0xffff because we want the significand
// to be in the range <=0.5 sig < 1.0
// Store the value of the exponent at the pointer in r33
// If x>0 form significand result
{ .mfi
nop.m 999
(p8) fmerge.se f8 = f10,f9
nop.i 999 ;;
}
// Get signexp of normalized x
// If x<0 form significand result
{ .mfi
(p0) getf.exp r16 = f9
(p7) fmerge.se f8 = f11,f9
nop.i 999 ;;
}
// Get exp of normalized x
// Subtract off bias to get true exponent of x
{ .mmi
(p0) and r18 = r17,r16 ;;
(p0) sub r19 = r18,r15
nop.i 999 ;;
}
// Store int y as a 32-bit integer
// Make the value a float
{ .mfb
(p0) st4 [r33] = r19
(p0) fnorm.s f8 = f8
(p0) br.ret.sptk b0 ;;
}
.endp __libm_frexp_4f
ASM_SIZE_DIRECTIVE(__libm_frexp_4f)

View File

@ -0,0 +1,184 @@
.file "libm_frexp_4l.s"
// Copyright (c) 2000, 2001, Intel Corporation
// All rights reserved.
//
// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
//
// WARRANTY DISCLAIMER
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
// http://developer.intel.com/opensource.
//
// History
//==============================================================
// 3/20/00: Initial version
// 6/01/00: Fixed bug when x a double-extended denormal
// 12/08/00 Corrected label on .endp
//
// API
//==============================================================
// long double frexpl(long double x, int* y)
// long double __libm_frexp_4l(long double x, int* y)
// where int* y is a 32-bit integer
//
// Overview of operation
//==============================================================
// break a floating point x number into fraction and an exponent
// The fraction is returned as a long double
// The exponent is returned as an integer pointed to by y
// This is a true (not a biased exponent) but 0fffe is subtracted
// as a bias instead of 0xffff. This is because the fraction returned
// is between 0.5 and 1.0, not the expected IEEE range.
//
// The fraction is 0.5 <= fraction < 1.0
//
// Registers used
//==============================================================
//
// general registers:
// r14 exponent bias for x negative
// r15 exponent bias for x positive
// r16 signexp of x
// r17 exponent mask
// r18 exponent of x
// r19 exponent result
// r20 signexp of 2^64
// r32-33 on input contains the 80-bit IEEE long double that is in f8
// r34 on input pointer to 32-bit integer for exponent
//
// predicate registers:
// p6 set if x is Nan, zero, or infinity
// p7 set if x negative
// p8 set if x positive
// p9 set if x double-extended denormal
//
// floating-point registers:
// f8 input, output
// f9 normalized x
// f10 signexp for significand result for x positive
// f11 signexp for significand result for x negative
// f12 2^64
#include "libm_support.h"
.align 32
.global __libm_frexp_4l#
.section .text
.proc __libm_frexp_4l#
.align 32
__libm_frexp_4l:
// Set signexp for significand result for x>0
// If x is a NaN, zero, or infinity, return it.
// Put 0 in the int pointer.
// x NAN, ZERO, INFINITY?
// Set signexp for significand result for x<0
{ .mfi
(p0) mov r15 = 0x0fffe
(p0) fclass.m.unc p6,p0 = f8, 0xe7
(p0) mov r14 = 0x2fffe
}
// Form signexp of 2^64 in case x double-extended denormal
// Save the normalized value of input in f9
// The normalization also sets fault flags and takes faults if necessary
{ .mfi
(p0) mov r20 = 0x1003f
(p0) fnorm f9 = f8
nop.i 999 ;;
}
// Move signexp for significand result for x>0 to FP reg
// Form 2^64 in case x double-extended denormal
{ .mmi
(p0) setf.exp f10 = r15
(p0) setf.exp f12 = r20
nop.i 999 ;;
}
// Move signexp for significand result for x<0 to FP reg
// If x NAN, ZERO, INFINITY, set *y=0 as a 32-bit integer, and exit
{ .mmb
(p0) setf.exp f11 = r14
(p6) st4 [r34] = r0
(p6) br.ret.spnt b0 ;;
}
// Form exponent mask
// p7 if x<0, else p8
{ .mfi
(p0) mov r17 = 0x1ffff
(p0) fcmp.lt.unc p7,p8 = f8,f0
nop.i 999 ;;
}
// Test for fnorm(x) denormal, means x double-extended denormal
{ .mfi
nop.m 999
(p0) fclass.m.unc p9,p0 = f9, 0x0b
nop.i 999 ;;
}
// If x double-extended denormal add 64 to exponent bias for scaling
// If x double-extended denormal multiply x * 2^64 which is normal
{ .mfi
(p9) add r15 = 64, r15
(p9) fmpy f9 = f9, f12
nop.i 999 ;;
}
// true exponent stored to int pointer
// the bias is treated as 0xfffe instead of
// normal 0xffff because we want the significand
// to be in the range <=0.5 sig < 1.0
// Store the value of the exponent at the pointer in r34
// If x>0 form significand result
{ .mfi
nop.m 999
(p8) fmerge.se f8 = f10,f9
nop.i 999 ;;
}
// Get signexp of normalized x
// If x<0 form significand result
{ .mfi
(p0) getf.exp r16 = f9
(p7) fmerge.se f8 = f11,f9
nop.i 999 ;;
}
// Get exp of normalized x
// Subtract off bias to get true exponent of x
{ .mmi
(p0) and r18 = r17,r16 ;;
(p0) sub r19 = r18,r15
nop.i 999 ;;
}
// Store int y as a 32-bit integer
// Make the value a long double
{ .mfb
(p0) st4 [r34] = r19
(p0) fnorm f8 = f8
(p0) br.ret.sptk b0 ;;
}
.endp __libm_frexp_4l
ASM_SIZE_DIRECTIVE(__libm_frexp_4l)

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,339 @@
//
// Copyright (c) 2000, 2001, Intel Corporation
// All rights reserved.
//
// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
//
// WARRANTY DISCLAIMER
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
// http://developer.intel.com/opensource.
//
// History: 02/02/2000 Initial version
// 2/28/2000 added tags for logb and nextafter
// 3/22/2000 Changes to support _LIB_VERSION variable
// and filled some enum gaps. Added support for C99.
// 5/31/2000 added prototypes for __libm_frexp_4l/8l
// 8/10/2000 Changed declaration of _LIB_VERSION to work for library
// builds and other application builds (precompiler directives).
// 8/11/2000 Added pointers-to-matherr-functions declarations to allow
// for user-defined matherr functions in the dll build.
// 12/07/2000 Added scalbn error_types values.
//
#ifndef ASSEMBLER
#include <math.h>
float __libm_frexp_4f( float x, int* exp);
float __libm_frexp_8f( float x, int* exp);
double __libm_frexp_4( double x, int* exp);
double __libm_frexp_8( double x, int* exp);
long double __libm_frexp_4l( long double x, int* exp);
long double __libm_frexp_8l( long double x, int* exp);
void __libm_sincos_pi4(double,double*,double*,int);
void __libm_y0y1(double , double *, double *);
void __libm_j0j1(double , double *, double *);
double __libm_lgamma_kernel(double,int*,int,int);
double __libm_j0(double);
double __libm_j1(double);
double __libm_jn(int,double);
double __libm_y0(double);
double __libm_y1(double);
double __libm_yn(int,double);
extern double rint(double);
extern double sqrt(double);
extern double fabs(double);
extern double log(double);
extern double log1p(double);
extern double sqrt(double);
extern double sin(double);
extern double exp(double);
extern double modf(double, double *);
extern double asinh(double);
extern double acosh(double);
extern double atanh(double);
extern double tanh(double);
extern double erf(double);
extern double erfc(double);
extern double j0(double);
extern double j1(double);
extern double jn(int, double);
extern double y0(double);
extern double y1(double);
extern double yn(int, double);
extern float fabsf(float);
extern float asinhf(float);
extern float acoshf(float);
extern float atanhf(float);
extern float tanhf(float);
extern float erff(float);
extern float erfcf(float);
extern float j0f(float);
extern float j1f(float);
extern float jnf(int, float);
extern float y0f(float);
extern float y1f(float);
extern float ynf(int, float);
extern long double log1pl(long double);
extern long double logl(long double);
extern long double sqrtl(long double);
extern long double expl(long double);
extern long lround(double);
extern long lroundf(float);
extern long lroundl(long double);
#if !(defined(SIZE_INT_32) || defined(SIZE_INT_64))
#error integer size not established; define SIZE_INT_32 or SIZE_INT_64
#endif
struct fp64 { /*/ sign:1 exponent:11 significand:52 (implied leading 1)*/
unsigned lo_significand:32;
unsigned hi_significand:20;
unsigned exponent:11;
unsigned sign:1;
};
#define HI_SIGNIFICAND_LESS(X, HI) ((X)->hi_significand < 0x ## HI)
#define f64abs(x) ((x) < 0.0 ? -(x) : (x))
typedef enum
{
logl_zero=0, logl_negative, /* 0, 1 */
log_zero, log_negative, /* 2, 3 */
logf_zero, logf_negative, /* 4, 5 */
log10l_zero, log10l_negative, /* 6, 7 */
log10_zero, log10_negative, /* 8, 9 */
log10f_zero, log10f_negative, /* 10, 11 */
expl_overflow, expl_underflow, /* 12, 13 */
exp_overflow, exp_underflow, /* 14, 15 */
expf_overflow, expf_underflow, /* 16, 17 */
powl_overflow, powl_underflow, /* 18, 19 */
powl_zero_to_zero, /* 20 */
powl_zero_to_negative, /* 21 */
powl_neg_to_non_integer, /* 22 */
powl_nan_to_zero, /* 23 */
pow_overflow, pow_underflow, /* 24, 25 */
pow_zero_to_zero, /* 26 */
pow_zero_to_negative, /* 27 */
pow_neg_to_non_integer, /* 28 */
pow_nan_to_zero, /* 29 */
powf_overflow, powf_underflow, /* 30, 31 */
powf_zero_to_zero, /* 32 */
powf_zero_to_negative, /* 33 */
powf_neg_to_non_integer, /* 34 */
powf_nan_to_zero, /* 35 */
atan2l_zero, /* 36 */
atan2_zero, /* 37 */
atan2f_zero, /* 38 */
expm1l_overflow, /* 39 */
expm1l_underflow, /* 40 */
expm1_overflow, /* 41 */
expm1_underflow, /* 42 */
expm1f_overflow, /* 43 */
expm1f_underflow, /* 44 */
hypotl_overflow, /* 45 */
hypot_overflow, /* 46 */
hypotf_overflow, /* 47 */
sqrtl_negative, /* 48 */
sqrt_negative, /* 49 */
sqrtf_negative, /* 50 */
scalbl_overflow, scalbl_underflow, /* 51, 52 */
scalb_overflow, scalb_underflow, /* 53, 54 */
scalbf_overflow, scalbf_underflow, /* 55, 56 */
acosl_gt_one, acos_gt_one, acosf_gt_one, /* 57, 58, 59 */
asinl_gt_one, asin_gt_one, asinf_gt_one, /* 60, 61, 62 */
coshl_overflow, cosh_overflow, coshf_overflow, /* 63, 64, 65 */
y0l_zero, y0l_negative,y0l_gt_loss, /* 66, 67, 68 */
y0_zero, y0_negative,y0_gt_loss, /* 69, 70, 71 */
y0f_zero, y0f_negative,y0f_gt_loss, /* 72, 73, 74 */
y1l_zero, y1l_negative,y1l_gt_loss, /* 75, 76, 77 */
y1_zero, y1_negative,y1_gt_loss, /* 78, 79, 80 */
y1f_zero, y1f_negative,y1f_gt_loss, /* 81, 82, 83 */
ynl_zero, ynl_negative,ynl_gt_loss, /* 84, 85, 86 */
yn_zero, yn_negative,yn_gt_loss, /* 87, 88, 89 */
ynf_zero, ynf_negative,ynf_gt_loss, /* 90, 91, 92 */
j0l_gt_loss, /* 93 */
j0_gt_loss, /* 94 */
j0f_gt_loss, /* 95 */
j1l_gt_loss, /* 96 */
j1_gt_loss, /* 97 */
j1f_gt_loss, /* 98 */
jnl_gt_loss, /* 99 */
jn_gt_loss, /* 100 */
jnf_gt_loss, /* 101 */
lgammal_overflow, lgammal_negative,lgammal_reserve, /* 102, 103, 104 */
lgamma_overflow, lgamma_negative,lgamma_reserve, /* 105, 106, 107 */
lgammaf_overflow, lgammaf_negative, lgammaf_reserve,/* 108, 109, 110 */
gammal_overflow,gammal_negative, gammal_reserve, /* 111, 112, 113 */
gamma_overflow, gamma_negative, gamma_reserve, /* 114, 115, 116 */
gammaf_overflow,gammaf_negative,gammaf_reserve, /* 117, 118, 119 */
fmodl_by_zero, /* 120 */
fmod_by_zero, /* 121 */
fmodf_by_zero, /* 122 */
remainderl_by_zero, /* 123 */
remainder_by_zero, /* 124 */
remainderf_by_zero, /* 125 */
sinhl_overflow, sinh_overflow, sinhf_overflow, /* 126, 127, 128 */
atanhl_gt_one, atanhl_eq_one, /* 129, 130 */
atanh_gt_one, atanh_eq_one, /* 131, 132 */
atanhf_gt_one, atanhf_eq_one, /* 133, 134 */
acoshl_lt_one, /* 135 */
acosh_lt_one, /* 136 */
acoshf_lt_one, /* 137 */
log1pl_zero, log1pl_negative, /* 138, 139 */
log1p_zero, log1p_negative, /* 140, 141 */
log1pf_zero, log1pf_negative, /* 142, 143 */
ldexpl_overflow, ldexpl_underflow, /* 144, 145 */
ldexp_overflow, ldexp_underflow, /* 146, 147 */
ldexpf_overflow, ldexpf_underflow, /* 148, 149 */
logbl_zero, logb_zero, logbf_zero, /* 150, 151, 152 */
nextafterl_overflow, nextafter_overflow,
nextafterf_overflow, /* 153, 154, 155 */
ilogbl_zero, ilogb_zero, ilogbf_zero, /* 156, 157, 158 */
exp2l_overflow, exp2l_underflow, /* 159, 160 */
exp2_overflow, exp2_underflow, /* 161, 162 */
exp2f_overflow, exp2f_underflow, /* 163, 164 */
exp10l_overflow, exp10_overflow,
exp10f_overflow, /* 165, 166, 167 */
log2l_zero, log2l_negative, /* 168, 169 */
log2_zero, log2_negative, /* 170, 171 */
log2f_zero, log2f_negative, /* 172, 173 */
scalbnl_overflow, scalbnl_underflow, /* 174, 175 */
scalbn_overflow, scalbn_underflow, /* 176, 177 */
scalbnf_overflow, scalbnf_underflow /* 178, 179 */
} error_types;
void __libm_error_support(void*,void*,void*,error_types);
#define BIAS_64 1023
#define EXPINF_64 2047
#define DOUBLE_HEX(HI, LO) 0x ## LO, 0x ## HI
#if 0
static const unsigned INF[] = {
DOUBLE_HEX(7ff00000, 00000000),
DOUBLE_HEX(fff00000, 00000000)
};
static const double _zeroo = 0.0;
static const double _bigg = 1.0e300;
static const double _ponee = 1.0;
static const double _nonee = -1.0;
#define INVALID (_zeroo * *((double*)&INF[0]))
#define PINF *((double*)&INF[0])
#define NINF -PINF
#define PINF_DZ (_ponee/_zeroo)
#define X_TLOSS 1.41484755040568800000e+16
#endif
struct exceptionf
{
int type;
char *name;
float arg1, arg2, retval;
};
# ifdef __cplusplus
struct __exception
{
int type;
char *name;
double arg1, arg2, retval;
};
# else
# ifndef _LIBC
struct exception
{
int type;
char *name;
double arg1, arg2, retval;
};
# endif
# endif
struct exceptionl
{
int type;
char *name;
long double arg1, arg2, retval;
};
#ifdef _MS_
#define MATHERR_F _matherrf
#define MATHERR_D _matherr
#else
#define MATHERR_F matherrf
#define MATHERR_D matherr
#endif
# ifdef __cplusplus
#define EXC_DECL_D __exception
#else
// exception is a reserved name in C++
#define EXC_DECL_D exception
#endif
extern int MATHERR_F(struct exceptionf*);
extern int MATHERR_D(struct EXC_DECL_D*);
extern int matherrl(struct exceptionl*);
/* Set these appropriately to make thread Safe */
#define ERRNO_RANGE errno = ERANGE
#define ERRNO_DOMAIN errno = EDOM
// Add code to support _LIB_VERSION
#ifndef _LIBC
typedef enum
{
_IEEE_ = -1, // IEEE-like behavior
_SVID_, // SysV, Rel. 4 behavior
_XOPEN_, // Unix98
_POSIX_, // Posix
_ISOC_ // ISO C9X
} _LIB_VERSION_TYPE;
extern _LIB_VERSION_TYPE _LIB_VERSION;
#endif
// This is a run-time variable and may effect
// floating point behavior of the libm functions
#endif /* ASSEMBLER */
/* Support for compatible assembler handling. */
#if !defined L && defined _LIBC
#define L(name) .L##name
#endif
#ifdef __ELF__
#define ASM_SIZE_DIRECTIVE(name) .size name,.-name
#define ASM_TYPE_DIRECTIVE(name,T) .type name,T
#else
#define ASM_SIZE_DIRECTIVE(name)
#define ASM_TYPE_DIRECTIVE(name,T)
#endif

3319
sysdeps/ia64/fpu/libm_tan.S Normal file

File diff suppressed because it is too large Load Diff

953
sysdeps/ia64/fpu/s_atan.S Normal file
View File

@ -0,0 +1,953 @@
.file "atan.s"
// Copyright (c) 2000, 2001, Intel Corporation
// All rights reserved.
//
// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
//
// WARRANTY DISCLAIMER
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
// http://developer.intel.com/opensource.
//
// History
//==============================================================
// 2/02/00: Initial version
// 4/13/00: Improved speed
// 4/19/00: Removed the qualifying predicate from the fmerge.s that
// takes the absolute value.
// 6/16/00: Reassigned FP registers to eliminate stalls on loads
// 8/30/00: Saved 5 cycles in main path by rearranging large argument logic
// and delaying use of result of fcmp in load by 1 group
//
// API
//==============================================================
// double atan( double x);
//
// Overview of operation
//==============================================================
// atan(x) = sign(X)pi/2 - atan(1/x)
//
// We have two paths: |x| > 1 and |x| <= 1
//
// |x| > 1
// ==========================================
//
// c = frcpa(x) which is approximately 1/x
//
// xc = 1- B
// B = 1-xc
//
// Approximate 1/(1-B)^k by a polynomial in B, poly(B)
// k is 45.
//
// poly(B) = 1 + r1 B + r2 B^2 + ...+ r10 B^10
//
// c^k = (1-B)^k/x^k
// c^k/(1-B)^k = 1/x^k
// c^k poly(B) = 1/x^k
// poly(x) = series(atan(1/x)) = 1/x - 1/3x^3 + 1/5x^5 - 1/7x^7 .... + 1/45 x^45
// = 1/x^45 ( x^44 - x^42/3 + x^40/5 - x^38/7 ... +1)
// = 1/x^45 ( y^22 - y^21/3 + y^20/5 - y^19/7 ... +1)
//
// = c^45 poly(B) poly(x)
// = c^45 r(B) q(y)
// q(y) = q0 + q1 y + q2 y^2 + ... + q22 y^22
// where q22 is 1.0
// atan(x) = sign(X)pi/2 - c^45 r(B) q(y)
// |x| <= 1
// ==========================================
// poly(x) = series(atan(x)) = x - x^3/3 + x^5/5 + .....
// poly(x) = series(atan(x)) = x + x^3(- 1/3 + x^2/5 + ..... +x^47/47)
// poly(x) = series(atan(x)) = x + x^3(p0 + x^2/5 + ..... + x^44/47)
// poly(x) = series(atan(x)) = x + x^3(p0 + y/5 + ..... + y^22/47)
// where p0 is about -1/3.
// atan(x) = poly(x)
#include "libm_support.h"
// Special Values
//==============================================================
// atan(QNAN) = QNAN
// atan(SNAN) = quieted SNAN
// atan(+-inf) = +- pi/2
// atan(+-0) = +-0
// Registers used
//==============================================================
// predicate registers used:
// p6 -> p11
// floating-point registers used:
// f32 -> f127
// general registers used
// r32 -> r37
// Assembly macros
//==============================================================
atan_Pi_by_2 = f32
atan_S_PI = f33
atan_ABS_f8 = f34
atan_R0 = f35
atan_R1 = f36
atan_R2 = f37
atan_R3 = f38
atan_R4 = f39
atan_R5 = f40
atan_R6 = f41
atan_R7 = f42
atan_R8 = f43
atan_R9 = f44
atan_R10 = f45
atan_Q0 = f46
atan_Q1 = f47
atan_Q2 = f48
atan_Q3 = f49
atan_Q4 = f50
atan_Q5 = f51
atan_Q6 = f52
atan_Q7 = f53
atan_Q8 = f54
atan_Q9 = f55
atan_Q10 = f56
atan_Q11 = f57
atan_Q12 = f58
atan_Q13 = f59
atan_Q14 = f60
atan_Q15 = f61
atan_Q16 = f62
atan_Q17 = f63
atan_Q18 = f64
atan_Q19 = f65
atan_Q20 = f66
atan_Q21 = f67
atan_Q22 = f68
// P and Q constants are mutually exclusive
// so they can share macro definitions
atan_P0 = f46
atan_P1 = f47
atan_P2 = f48
atan_P3 = f49
atan_P4 = f10
atan_P5 = f11
atan_P6 = f12
atan_P7 = f13
atan_P10 = f103
atan_P11 = f114
atan_P12 = f58
atan_P13 = f59
atan_P14 = f60
atan_P15 = f61
atan_P16 = f62
atan_P17 = f63
atan_P18 = f64
atan_P19 = f65
atan_P20 = f14
atan_P21 = f99
atan_P22 = f68
// end of P constant macros
atan_C = f69
atan_Y = f70
atan_B = f71
atan_Z = f72
atan_V11 = f73
atan_V12 = f74
atan_V7 = f75
atan_V8 = f76
atan_W13 = f77
atan_W11 = f78
atan_V3 = f79
atan_V4 = f80
atan_G11 = f81
atan_G12 = f82
atan_G7 = f83
atan_G8 = f84
atan_Z1 = f85
atan_W7 = f86
atan_G3 = f87
atan_W8 = f88
atan_V9 = f89
atan_V10 = f90
atan_G10 = f91
atan_W3 = f92
atan_G4 = f93
atan_G9 = f94
atan_G6 = f95
atan_W4 = f96
atan_Z2 = f97
atan_V6 = f98
atan_V2 = f99
atan_W6 = f100
atan_W10 = f101
atan_Y3 = f102
atan_G2 = f103
atan_Y8 = f104
atan_G5 = f105
atan_Z3 = f106
atan_Z4 = f107
atan_W2 = f108
atan_V5 = f109
atan_W5 = f110
atan_G1 = f111
atan_Y11 = f112
atan_Z5 = f113
atan_Z6 = f114
atan_V1 = f115
atan_W1 = f116
atan_Z7 = f117
atan_Q = f118
atan_Z = f119
atan_abs_f8 = f120
atan_V13 = f121
atan_Xcub = f122
atan_Y12 = f123
atan_P = f124
atan_NORM_f8 = f125
atan_P8 = f126
atan_P9 = f127
atan_GR_AD_R = r14
atan_GR_AD_Q = r15
atan_GR_AD_P = r16
atan_GR_10172 = r17
atan_GR_exp_f8 = r18
atan_GR_signexp_f8 = r19
atan_GR_exp_mask = r20
/////////////////////////////////////////////////////////////
#ifdef _LIBC
.rodata
#else
.data
#endif
.align 16
double_atan_constants_R:
ASM_TYPE_DIRECTIVE(double_atan_constants_R,@object)
data8 0xB36B46B9C5443CED, 0x0000401C //R8
data8 0x842633E0D126261F, 0x0000401F //R9
data8 0xBE04FFFFFFFF46E0, 0x00004010 //R4
data8 0xE8C62000244D66E2, 0x00004013 //R5
data8 0xF2790C001E3789B3, 0x00004016 //R6
data8 0xDCD2CCF97D7C764F, 0x00004019 //R7
data8 0xB40000000000000B, 0x00004004 //R1
data8 0xB265F3D38F5EE28F, 0x00004021 //R10
data8 0x8160000000000001, 0x00004009 //R2
data8 0xFD5BFFFFFFFE55CD, 0x0000400C //R3
data8 0xC90FDAA22168C235, 0x00003FFF // pi/2
ASM_SIZE_DIRECTIVE(double_atan_constants_R)
double_atan_constants_Q:
ASM_TYPE_DIRECTIVE(double_atan_constants_Q,@object)
data8 0xEBD602FA7761BC33, 0x00003FF9 //Q8
data8 0x8CB1CABD6A91913C, 0x0000BFFA //Q9
data8 0x84C665C37D623CD2, 0x00003FF7 //Q4
data8 0x8DE0D1673DAEA9BC, 0x0000BFF8 //Q5
data8 0xF658ADBE2C6E6FCC, 0x00003FF8 //Q6
data8 0xB56307BE1DD3FFB6, 0x0000BFF9 //Q7
data8 0xAAAAAAAAAAAA8000, 0x0000BFFD //Q21
data8 0x8000000000000000, 0x00003FFF //Q22
data8 0x924924923A9D710C, 0x0000BFFC //Q19
data8 0xCCCCCCCCCC9380E7, 0x00003FFC //Q20
data8 0xA644DC250EFA2800, 0x00003FED //Q0
data8 0x83DEAE24EEBF5E44, 0x0000BFF1 //Q1
data8 0xC758CCC64793D4EC, 0x00003FF3 //Q2
data8 0xBFDC0B54E7C89DCE, 0x0000BFF5 //Q3
data8 0x888855199D1290AF, 0x0000BFFB //Q15
data8 0x9D89D3BE514B0178, 0x00003FFB //Q16
data8 0xBA2E8B4DEC70282A, 0x0000BFFB //Q17
data8 0xE38E38DF9E9FC83B, 0x00003FFB //Q18
data8 0x9F8781CC990029D9, 0x00003FFA //Q10
data8 0xB0B39472DEBA3C79, 0x0000BFFA //Q11
data8 0xC2AFAEF8C85B0BC6, 0x00003FFA //Q12
data8 0xD780E539797525DD, 0x0000BFFA //Q13
data8 0xF0EDC449AC786DF9, 0x00003FFA //Q14
ASM_SIZE_DIRECTIVE(double_atan_constants_Q)
double_atan_constants_P:
ASM_TYPE_DIRECTIVE(double_atan_constants_P,@object)
data8 0xB1899EC590CDB8DF, 0x0000BFFA //P10
data8 0xA1E79850A67D59B0, 0x00003FFA //P11
data8 0x911D8B30C2A96E6D, 0x0000BFF3 //P20
data8 0xB87233C68A640706, 0x00003FF0 //P21
data8 0xD78E4B82F3C29D7A, 0x0000BFFA //P8
data8 0xC2EBE37AF932C14F, 0x00003FFA //P9
data8 0xBA2E8B94AA104DD6, 0x0000BFFB //P4
data8 0x9D89D7A640B71D38, 0x00003FFB //P5
data8 0x88887CA2CE9B2A40, 0x0000BFFB //P6
data8 0xF0F017D57A919C1E, 0x00003FFA //P7
data8 0xD0D635F230C80E06, 0x0000BFF8 //P16
data8 0xE847BECA7209B479, 0x00003FF7 //P17
data8 0xD14C6A2AAE0D5B07, 0x0000BFF6 //P18
data8 0x915F612A5C469117, 0x00003FF5 //P19
data8 0x921EDE5FD0DBBBE2, 0x0000BFFA //P12
data8 0xFFD303C2C8535445, 0x00003FF9 //P13
data8 0xD30DF50E295386F7, 0x0000BFF9 //P14
data8 0x9E81F2B1BBD210A8, 0x00003FF9 //P15
data8 0xAAAAAAAAAAAAA800, 0x0000BFFD //P0
data8 0xCCCCCCCCCCC7D476, 0x00003FFC //P1
data8 0x9249249247838066, 0x0000BFFC //P2
data8 0xE38E38E302290D68, 0x00003FFB //P3
data8 0xDF7F0A816F7E5025, 0x0000BFEC //P22
ASM_SIZE_DIRECTIVE(double_atan_constants_P)
.align 32
.global atan#
////////////////////////////////////////////////////////
.section .text
.proc atan#
.align 32
atan:
{ .mmf
(p0) addl atan_GR_AD_P = @ltoff(double_atan_constants_P), gp
(p0) addl atan_GR_AD_Q = @ltoff(double_atan_constants_Q), gp
(p0) fmerge.s atan_ABS_f8 = f0,f8
}
;;
{ .mmf
ld8 atan_GR_AD_P = [atan_GR_AD_P]
ld8 atan_GR_AD_Q = [atan_GR_AD_Q]
(p0) frcpa.s1 atan_C,p8 = f1,f8
}
;;
{ .mmf
(p0) addl atan_GR_AD_R = @ltoff(double_atan_constants_R), gp
(p0) addl atan_GR_exp_mask = 0x1ffff, r0
(p0) fma.s1 atan_Y = f8,f8,f0
}
;;
// This fnorm takes faults or sets fault flags
{ .mmf
(p0) mov atan_GR_10172 = 0x10172
ld8 atan_GR_AD_R = [atan_GR_AD_R]
(p0) fnorm atan_NORM_f8 = f8
}
;;
// qnan snan inf norm unorm 0 -+
// 1 1 0 0 0 1 11
// c 7
// p9 set if we have a NAN or +-0
{ .mmf
(p0) ldfe atan_Q8 = [atan_GR_AD_Q],16
(p0) ldfe atan_P10 = [atan_GR_AD_P],16
(p0) fclass.m.unc p9, p0 = f8, 0xc7
}
;;
{ .mmi
(p0) ldfe atan_Q9 = [atan_GR_AD_Q],16
(p0) ldfe atan_P11 = [atan_GR_AD_P],16
nop.i 999
}
;;
{ .mmf
(p0) ldfe atan_Q4 = [atan_GR_AD_Q],16
(p0) ldfe atan_P20 = [atan_GR_AD_P],16
(p9) fma.d.s0 f8 = f8,f1,f0
;;
}
// Exit if we have a NAN or +-0
{ .mmb
(p0) ldfe atan_Q5 = [atan_GR_AD_Q],16
(p0) ldfe atan_P21 = [atan_GR_AD_P],16
(p9) br.ret.spnt b0
;;
}
// p6 is TRUE if |x| <= 1
// p7 is TRUE if |x| > 1
{ .mmf
(p0) ldfe atan_Q6 = [atan_GR_AD_Q],16
(p0) ldfe atan_P8 = [atan_GR_AD_P],16
(p0) fcmp.le.unc p6,p7 = atan_ABS_f8, f1
;;
}
{ .mfi
(p0) ldfe atan_Q7 = [atan_GR_AD_Q],16
(p0) fma.s1 atan_Z = atan_C, atan_C, f0
nop.i 999
}
{ .mfi
(p0) ldfe atan_P9 = [atan_GR_AD_P],16
(p0) fnma.s1 atan_B = atan_C,f8, f1
nop.i 999 ;;
}
{ .mfi
(p0) ldfe atan_Q21 = [atan_GR_AD_Q],16
(p0) fma.s1 atan_V12 = atan_Y, atan_Y, f0
nop.i 999
}
{ .mfi
(p0) ldfe atan_P4 = [atan_GR_AD_P],16
(p0) fma.s1 atan_Xcub = f8, atan_Y , f0
nop.i 999
;;
}
{ .mmi
(p7) ldfe atan_Q22 = [atan_GR_AD_Q],16
(p6) ldfe atan_P5 = [atan_GR_AD_P],16
(p6) cmp.eq.unc p8,p0 = r0,r0
;;
}
{ .mmi
(p7) ldfe atan_Q19 = [atan_GR_AD_Q],16
(p6) ldfe atan_P6 = [atan_GR_AD_P],16
(p7) cmp.eq.unc p9,p0 = r0,r0
;;
}
{ .mmi
(p7) ldfe atan_Q20 = [atan_GR_AD_Q],16
(p6) ldfe atan_P7 = [atan_GR_AD_P],16
nop.i 999
;;
}
{ .mfi
(p7) ldfe atan_Q0 = [atan_GR_AD_Q],16
(p6) fma.s1 atan_V13 = atan_Y, atan_P11, atan_P10
nop.i 999
}
{ .mfi
(p6) ldfe atan_P16 = [atan_GR_AD_P],16
(p7) fma.s1 atan_V11 = atan_Y, atan_Q9, atan_Q8
nop.i 999 ;;
}
{ .mfi
(p7) ldfe atan_Q1 = [atan_GR_AD_Q],16
(p7) fma.s1 atan_G12 = atan_B, atan_B, f0
nop.i 999
}
{ .mfi
(p6) ldfe atan_P17 = [atan_GR_AD_P],16
(p0) fma.s1 atan_V9 = atan_V12, atan_V12, f0
nop.i 999 ;;
}
{ .mfi
(p7) ldfe atan_Q2 = [atan_GR_AD_Q],16
(p6) fma.s1 atan_W11 = atan_Y, atan_P21, atan_P20
nop.i 999
}
{ .mfi
(p6) ldfe atan_P18 = [atan_GR_AD_P],16
(p7) fma.s1 atan_V7 = atan_Y, atan_Q5, atan_Q4
nop.i 999 ;;
}
{ .mfi
(p7) ldfe atan_Q3 = [atan_GR_AD_Q],16
(p7) fma.s1 atan_Z1 = atan_Z, atan_Z, f0
nop.i 999
}
{ .mfi
(p6) ldfe atan_P19 = [atan_GR_AD_P],16
(p7) fma.s1 atan_Y3 = atan_Y , atan_V12, f0
nop.i 999 ;;
}
{ .mfi
(p7) ldfe atan_R8 = [atan_GR_AD_R],16
(p6) fma.s1 atan_V11 = atan_Y, atan_P9, atan_P8
nop.i 999
}
{ .mfi
(p6) ldfe atan_P12 = [atan_GR_AD_P],16
(p7) fma.s1 atan_V8 = atan_Y, atan_Q7, atan_Q6
nop.i 999 ;;
}
{ .mmi
(p7) ldfe atan_R9 = [atan_GR_AD_R],16
(p6) ldfe atan_P13 = [atan_GR_AD_P],16
nop.i 999
;;
}
{ .mfi
(p7) ldfe atan_R4 = [atan_GR_AD_R],16
(p6) fma.s1 atan_V7 = atan_Y, atan_P5, atan_P4
nop.i 999
}
{ .mfi
(p6) ldfe atan_P14 = [atan_GR_AD_P],16
(p7) fma.s1 atan_W13 = atan_Y, atan_Q22, atan_Q21
nop.i 999 ;;
}
{ .mfi
(p7) ldfe atan_R5 = [atan_GR_AD_R],16
(p6) fma.s1 atan_Y12 = atan_V9 , atan_V9 , f0
nop.i 999
}
{ .mfi
(p6) ldfe atan_P15 = [atan_GR_AD_P],16
(p7) fma.s1 atan_Y8 = atan_V9 , atan_V9 , f0
nop.i 999 ;;
}
{ .mfi
(p7) ldfe atan_R6 = [atan_GR_AD_R],16
(p6) fma.s1 atan_V8 = atan_Y, atan_P7, atan_P6
nop.i 999
}
{ .mfi
(p6) ldfe atan_P0 = [atan_GR_AD_P],16
(p7) fma.s1 atan_W11 = atan_Y, atan_Q20, atan_Q19
nop.i 999 ;;
}
{ .mfi
(p7) ldfe atan_R7 = [atan_GR_AD_R],16
(p7) fma.s1 atan_Z2 = atan_Z1 , atan_Z1, f0
nop.i 999
}
{ .mfi
(p6) ldfe atan_P1 = [atan_GR_AD_P],16
(p6) fma.s1 atan_V10 = atan_V12, atan_V13, atan_V11
nop.i 999 ;;
}
{ .mfi
(p7) ldfe atan_Q15 = [atan_GR_AD_Q],16
(p6) fma.s1 atan_W7 = atan_Y, atan_P17, atan_P16
nop.i 999
}
{ .mfi
(p6) ldfe atan_P2 = [atan_GR_AD_P],16
(p7) fma.s1 atan_V3 = atan_Y, atan_Q1 , atan_Q0
nop.i 999 ;;
}
{ .mfi
(p7) ldfe atan_Q16 = [atan_GR_AD_Q],16
(p7) fma.s1 atan_G9 = atan_G12, atan_G12, f0
nop.i 999
}
{ .mfi
(p6) ldfe atan_P3 = [atan_GR_AD_P],16
(p7) fma.s1 atan_V6 = atan_V12, atan_V8, atan_V7
nop.i 999 ;;
}
{ .mfi
(p7) ldfe atan_R1 = [atan_GR_AD_R],16
(p6) fma.s1 atan_W8 = atan_Y, atan_P19, atan_P18
nop.i 999
}
{ .mfi
(p6) ldfe atan_P22 = [atan_GR_AD_P],16
(p7) fma.s1 atan_V4 = atan_Y, atan_Q3 , atan_Q2
nop.i 999 ;;
}
{ .mfi
getf.exp atan_GR_signexp_f8 = atan_NORM_f8
(p7) fma.s1 atan_Y11 = atan_Y3 , atan_Y8 , f0
nop.i 999
}
{ .mfi
(p7) ldfe atan_Q17 = [atan_GR_AD_Q],16
(p6) fma.s1 atan_V6 = atan_V12, atan_V8, atan_V7
nop.i 999 ;;
}
{ .mfi
(p7) ldfe atan_Q18 = [atan_GR_AD_Q],16
(p6) fma.s1 atan_W3 = atan_Y, atan_P13, atan_P12
nop.i 999
}
{ .mfi
(p7) ldfe atan_R10 = [atan_GR_AD_R],16
(p7) fma.s1 atan_G11 = atan_B, atan_R9 , atan_R8
nop.i 999 ;;
}
{ .mfi
(p7) ldfe atan_Q10 = [atan_GR_AD_Q],16
(p7) fma.s1 atan_Z3 = atan_Z1 , atan_Z2 , f0
(p0) and atan_GR_exp_f8 = atan_GR_signexp_f8,atan_GR_exp_mask
}
{ .mfi
(p7) ldfe atan_R2 = [atan_GR_AD_R],16
(p7) fma.s1 atan_Z4 = atan_Z2 , atan_Z2 , f0
nop.i 999 ;;
}
{ .mfi
(p7) ldfe atan_Q11 = [atan_GR_AD_Q],16
(p6) fma.s1 atan_W4 = atan_Y, atan_P15, atan_P14
nop.i 999
}
{ .mfi
(p7) ldfe atan_R3 = [atan_GR_AD_R],16
(p7) fma.s1 atan_G7 = atan_B, atan_R5 , atan_R4
(p0) cmp.le.unc p11,p0 = atan_GR_10172,atan_GR_exp_f8
;;
}
{ .mmf
(p9) ldfe atan_Q12 = [atan_GR_AD_Q],16
(p0) ldfe atan_S_PI = [atan_GR_AD_R],16
(p8) fma.s1 atan_W6 = atan_V12, atan_W8, atan_W7
;;
}
{ .mfi
(p9) ldfe atan_Q13 = [atan_GR_AD_Q],16
(p8) fma.s1 atan_V3 = atan_Y, atan_P1 , atan_P0
(p11) cmp.ne.and p6,p7 = r0,r0
}
{ .mfi
nop.m 999
(p8) fma.s1 atan_V5 = atan_V9 , atan_V10, atan_V6
nop.i 999 ;;
}
.pred.rel "mutex",p6,p7,p11
{ .mfi
(p7) ldfe atan_Q14 = [atan_GR_AD_Q],16
(p6) fma.s1 atan_Y12 = atan_V9 , atan_Y12, f0
nop.i 999
}
{ .mfi
nop.m 999
(p7) fma.s1 atan_G8 = atan_B, atan_R7 , atan_R6
nop.i 999 ;;
}
{ .mfi
nop.m 999
(p6) fma.s1 atan_V4 = atan_Y, atan_P3 , atan_P2
nop.i 999
}
{ .mfi
nop.m 999
(p7) fma.s1 atan_W7 = atan_Y, atan_Q16, atan_Q15
nop.i 999 ;;
}
{ .mfi
nop.m 999
(p6) fma.s1 atan_W10 = atan_V12, atan_P22, atan_W11
nop.i 999
}
{ .mfi
nop.m 999
(p7) fma.s1 atan_G3 = atan_B, atan_R1 , f1
nop.i 999 ;;
}
{ .mfi
nop.m 999
(p6) fma.s1 atan_W2 = atan_V12, atan_W4 , atan_W3
nop.i 999
}
{ .mfi
nop.m 999
(p7) fma.s1 atan_V2 = atan_V12, atan_V4 , atan_V3
nop.i 999 ;;
}
{ .mfi
nop.m 999
(p7) fma.s1 atan_W8 = atan_Y, atan_Q18, atan_Q17
nop.i 999
}
{ .mfi
nop.m 999
(p7) fma.s1 atan_G10 = atan_G12, atan_R10, atan_G11
nop.i 999 ;;
}
{ .mfi
nop.m 999
(p7) fma.s1 atan_V10 = atan_V12, atan_Q10, atan_V11
nop.i 999
}
{ .mfi
nop.m 999
(p7) fma.s1 atan_G6 = atan_G12, atan_G8 , atan_G7
nop.i 999 ;;
}
{ .mfi
nop.m 999
(p6) fma.s1 atan_V2 = atan_V12, atan_V4, atan_V3
nop.i 999
}
{ .mfi
nop.m 999
(p7) fma.s1 atan_G4 = atan_B , atan_R3 , atan_R2
nop.i 999 ;;
}
{ .mfi
nop.m 999
(p6) fma.s1 atan_W5 = atan_V9 , atan_W10, atan_W6
nop.i 999
}
{ .mfi
nop.m 999
(p7) fma.s1 atan_W3 = atan_Y , atan_Q12, atan_Q11
nop.i 999 ;;
}
{ .mfi
nop.m 999
(p7) fma.s1 atan_Z5 = atan_Z3 , atan_Z4 , f0
nop.i 999
}
{ .mfi
nop.m 999
(p7) fma.s1 atan_W10 = atan_V12, atan_W13, atan_W11
nop.i 999 ;;
}
{ .mfi
nop.m 999
(p7) fma.s1 atan_W4 = atan_Y , atan_Q14, atan_Q13
nop.i 999
}
{ .mfi
nop.m 999
(p7) fma.s1 atan_W6 = atan_V12, atan_W8, atan_W7
nop.i 999 ;;
}
{ .mfi
nop.m 999
(p7) fma.s1 atan_V5 = atan_V9 , atan_V10, atan_V6
nop.i 999
}
{ .mfi
nop.m 999
(p7) fma.s1 atan_G5 = atan_G9 , atan_G10, atan_G6
nop.i 999 ;;
}
{ .mfi
nop.m 999
(p6) fma.s1 atan_V1 = atan_V9 , atan_V5 , atan_V2
nop.i 999
}
{ .mfi
nop.m 999
(p7) fma.s1 atan_G2 = atan_G12, atan_G4 , atan_G3
nop.i 999 ;;
}
{ .mfi
nop.m 999
(p6) fma.s1 atan_W1 = atan_V9 , atan_W5 , atan_W2
nop.i 999
}
{ .mfi
nop.m 999
(p7) fma.s1 atan_Z6 = atan_Z4 , atan_C , f0
nop.i 999 ;;
}
{ .mfi
nop.m 999
(p0) fmerge.s atan_S_PI = f8, atan_S_PI
nop.i 999 ;;
}
{ .mfi
nop.m 999
(p7) fma.s1 atan_W5 = atan_V9 , atan_W10, atan_W6
nop.i 999
}
{ .mfi
nop.m 999
(p7) fma.s1 atan_W2 = atan_V12, atan_W4 , atan_W3
nop.i 999 ;;
}
{ .mfi
nop.m 999
(p7) fma.s1 atan_G1 = atan_G9 , atan_G5 , atan_G2
nop.i 999
}
{ .mfi
nop.m 999
(p7) fma.s1 atan_V1 = atan_V9 , atan_V5 , atan_V2
nop.i 999 ;;
}
{ .mfi
nop.m 999
(p6) fma.s1 atan_P = atan_Y12, atan_W1 , atan_V1
nop.i 999
}
{ .mfi
nop.m 999
(p7) fma.s1 atan_Z7 = atan_Z5 , atan_Z6 , f0
nop.i 999 ;;
}
{ .mfi
nop.m 999
(p7) fma.s1 atan_W1 = atan_V9 , atan_W5 , atan_W2
nop.i 999 ;;
}
{ .mfi
nop.m 999
(p11) fma.d.s0 f8 = atan_S_PI,f1,f0
nop.i 999
}
{ .mfi
nop.m 999
(p7) fma.s1 atan_Z = atan_G1 , atan_Z7 , f0
nop.i 999 ;;
}
{ .mfi
nop.m 999
(p7) fma.s1 atan_Q = atan_Y11, atan_W1 , atan_V1
nop.i 999 ;;
}
{ .mfi
nop.m 999
(p6) fma.d.s0 f8 = atan_P , atan_Xcub , f8
nop.i 999
}
{ .mfb
nop.m 999
(p7) fnma.d.s0 f8 = atan_Z , atan_Q , atan_S_PI
(p0) br.ret.sptk b0 ;;
}
.endp atan
ASM_SIZE_DIRECTIVE(atan)

543
sysdeps/ia64/fpu/s_atanf.S Normal file
View File

@ -0,0 +1,543 @@
.file "atanf.s"
// THIS IS NOT OPTIMIZED AND NOT OFFICIAL
// Copyright (c) 2000, 2001, Intel Corporation
// All rights reserved.
//
// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
//
// WARRANTY DISCLAIMER
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
// http://developer.intel.com/opensource.
// History
//==============================================================
// ?/??/00 Initial revision
// 8/17/00 Changed predicate register macro-usage to direct predicate
// names due to an assembler bug.
#include "libm_support.h"
//
// Assembly macros
//==============================================================
// integer registers used
EXP_Addr1 = r33
EXP_Addr2 = r34
// floating point registers used
atanf_coeff_R4 = f32
atanf_coeff_R5 = f33
atanf_coeff_R1 = f34
atanf_coeff_R2 = f35
atanf_coeff_R3 = f36
atanf_coeff_P1 = f37
atanf_coeff_Q6 = f38
atanf_coeff_Q7 = f39
atanf_coeff_Q8 = f40
atanf_coeff_Q9 = f41
atanf_coeff_Q4 = f42
atanf_coeff_Q5 = f43
atanf_coeff_Q2 = f44
atanf_coeff_Q3 = f45
atanf_coeff_P5 = f46
atanf_coeff_P6 = f47
atanf_coeff_Q0 = f48
atanf_coeff_Q1 = f49
atanf_coeff_P7 = f50
atanf_coeff_P8 = f51
atanf_coeff_P3 = f52
atanf_coeff_P4 = f53
atanf_coeff_P9 = f54
atanf_coeff_P10 = f55
atanf_coeff_P2 = f56
atanf_piby2 = f57
atanf_z = f58
atanf_b = f59
atanf_zsq = f60
atanf_sgn_x = f61
atanf_sgnx_piby2 = f62
atanf_abs_x = f63
atanf_t = f64
atanf_xcub = f65
atanf_tsq = f66
atanf_t4 = f67
atanf_x5 = f68
atanf_x6 = f69
atanf_x11 = f70
atanf_poly_p1 = f71
atanf_poly_p2 = f72
atanf_poly_p3 = f73
atanf_poly_p4 = f74
atanf_poly_p5 = f75
atanf_poly_q1 = f76
atanf_poly_q2 = f77
atanf_poly_q3 = f78
atanf_poly_q4 = f79
atanf_poly_q5 = f80
atanf_poly_q = f81
atanf_poly_r1 = f81
atanf_poly_r2 = f82
atanf_poly_r3 = f83
atanf_bsq = f84
atanf_z4 = f85
atanf_z5 = f86
atanf_z8 = f87
atanf_z13 = f88
atanf_poly_r2 = f89
atanf_poly_r1 = f90
atanf_z8_bsq = f91
atanf_poly_r = f92
atanf_z21_poly_r = f93
atanf_answer = f8
// predicate registers used
//atanf_pred_LE1 = p6
//atanf_pred_GT1 = p7
#ifdef _LIBC
.rodata
#else
.data
#endif
.align 16
atanf_coeff_1_table:
ASM_TYPE_DIRECTIVE(atanf_coeff_1_table,@object)
data8 0x40c4c241be751ff2 // r4
data8 0x40e9f300c2f3070b // r5
data8 0x409babffef772075 // r3
data8 0xbfd5555512191621 // p1
data8 0x3fc9997e7afbff4e // p2 = q8
data8 0xbfd5555512191621 // p1 = q9
data8 0x3f97105b4160f86b // p8 = q2
data8 0xbfa6e10ba401393f // p7 = q3
data8 0x3f522e5d33bc9baa // p10 = q0
data8 0xbf7deaadaa336451 // p9 = q1
data8 0xbfc2473c5145ee38 // p3
data8 0x3fbc4f512b1865f5 // p4
data8 0x3fc9997e7afbff4e // p2
data8 0x3ff921fb54442d18 // pi/2
ASM_SIZE_DIRECTIVE(atanf_coeff_1_table)
atanf_coeff_2_table:
ASM_TYPE_DIRECTIVE(atanf_coeff_2_table,@object)
data8 0x4035000000004284 // r1
data8 0x406cdffff336a59b // r2
data8 0x3fbc4f512b1865f5 // p4 = q6
data8 0xbfc2473c5145ee38 // p3 = q7
data8 0x3fb142a73d7c54e3 // p6 = q4
data8 0xbfb68eed6a8cfa32 // p5 = q5
data8 0xbfb68eed6a8cfa32 // p5
data8 0x3fb142a73d7c54e3 // p6
data8 0xbfa6e10ba401393f // p7
data8 0x3f97105b4160f86b // p8
data8 0xbf7deaadaa336451 // p9
data8 0x3f522e5d33bc9baa // p10
ASM_SIZE_DIRECTIVE(atanf_coeff_2_table)
.global atanf
.text
.proc atanf
.align 32
atanf:
{ .mfi
alloc r32 = ar.pfs,1,2,0,0
frcpa.s1 atanf_z,p0 = f1,f8
addl EXP_Addr2 = @ltoff(atanf_coeff_2_table),gp
}
{ .mfi
addl EXP_Addr1 = @ltoff(atanf_coeff_1_table),gp
fma.s1 atanf_t = f8,f8,f0
nop.i 999;;
}
{ .mfi
nop.m 999
fmerge.s atanf_sgn_x = f8,f1
nop.i 999;;
}
{ .mfi
ld8 EXP_Addr1 = [EXP_Addr1]
fmerge.s atanf_abs_x = f1,f8
nop.i 999
}
{ .mfi
ld8 EXP_Addr2 = [EXP_Addr2]
nop.f 999
nop.i 999;;
}
{ .mfi
nop.m 999
fclass.m p8,p0 = f8,0x7 // @zero
nop.i 999;;
}
{ .mfi
nop.m 999
fcmp.eq.unc.s0 p9,p10 = f8,f1
nop.i 999;;
}
{ .mfi
ldfpd atanf_coeff_R4,atanf_coeff_R5 = [EXP_Addr1],16
fnma.s1 atanf_b = f8,atanf_z,f1
nop.i 999
}
{ .mfi
ldfpd atanf_coeff_R1,atanf_coeff_R2 = [EXP_Addr2],16
fma.s1 atanf_zsq = atanf_z,atanf_z,f0
nop.i 999;;
}
{ .mfi
ldfpd atanf_coeff_R3,atanf_coeff_P1 = [EXP_Addr1],16
fma.s1 atanf_xcub = f8,atanf_t,f0
nop.i 999
}
{ .mfi
ldfpd atanf_coeff_Q6,atanf_coeff_Q7 = [EXP_Addr2],16
fma.s1 atanf_tsq = atanf_t,atanf_t,f0
nop.i 999;;
}
{ .mfi
ldfpd atanf_coeff_Q8,atanf_coeff_Q9 = [EXP_Addr1],16
// fcmp.le.s1 atanf_pred_LE1,atanf_pred_GT1 = atanf_abs_x,f1
fcmp.le.s1 p6,p7 = atanf_abs_x,f1
nop.i 999
}
{ .mfi
ldfpd atanf_coeff_Q4,atanf_coeff_Q5 = [EXP_Addr2],16
nop.f 999
nop.i 999;;
}
{ .mfi
ldfpd atanf_coeff_Q2,atanf_coeff_Q3 = [EXP_Addr1],16
fclass.m p8,p0 = f8,0xe7 // @inf|@qnan|@snan|@zero
nop.i 999
}
{ .mfi
ldfpd atanf_coeff_P5,atanf_coeff_P6 = [EXP_Addr2],16
nop.f 999
nop.i 999;;
}
{ .mfi
ldfpd atanf_coeff_Q0,atanf_coeff_Q1 = [EXP_Addr1],16
nop.f 999
nop.i 999
}
{ .mfi
ldfpd atanf_coeff_P7,atanf_coeff_P8 = [EXP_Addr2],16
nop.f 999
nop.i 999;;
}
{ .mfi
ldfpd atanf_coeff_P3,atanf_coeff_P4 = [EXP_Addr1],16
fma.s1 atanf_bsq = atanf_b,atanf_b,f0
nop.i 999
}
{ .mfi
ldfpd atanf_coeff_P9,atanf_coeff_P10 = [EXP_Addr2]
fma.s1 atanf_z4 = atanf_zsq,atanf_zsq,f0
nop.i 999;;
}
{ .mfi
ldfpd atanf_coeff_P2,atanf_piby2 = [EXP_Addr1]
fma.s1 atanf_x6 = atanf_t,atanf_tsq,f0
nop.i 999
}
{ .mfi
nop.m 999
fma.s1 atanf_t4 = atanf_tsq,atanf_tsq,f0
nop.i 999;;
}
{ .mfb
nop.m 999
fma.s1 atanf_x5 = atanf_t,atanf_xcub,f0
(p8) br.cond.spnt L(ATANF_X_INF_NAN_ZERO)
}
;;
{ .mfi
nop.m 999
fma.s1 atanf_poly_r1 = atanf_b,atanf_coeff_R1,f1
nop.i 999
}
{ .mfi
nop.m 999
fma.s1 atanf_poly_r3 = atanf_b,atanf_coeff_R5,atanf_coeff_R4
nop.i 999;;
}
{ .mfi
nop.m 999
fma.s1 atanf_poly_r2 = atanf_b,atanf_coeff_R3,atanf_coeff_R2
nop.i 999
}
{ .mfi
nop.m 999
fma.s1 atanf_z8 = atanf_z4,atanf_z4,f0
nop.i 999;;
}
{ .mfi
nop.m 999
fma.s1 atanf_poly_q2 = atanf_t,atanf_coeff_Q5,atanf_coeff_Q4
nop.i 999
}
{ .mfi
nop.m 999
fma.s1 atanf_poly_q3 = atanf_t,atanf_coeff_Q7,atanf_coeff_Q6
nop.i 999;;
}
{ .mfi
nop.m 999
fma.s1 atanf_z5 = atanf_z,atanf_z4,f0
nop.i 999
}
{ .mfi
nop.m 999
fma.s1 atanf_poly_q1 = atanf_t,atanf_coeff_Q9,atanf_coeff_Q8
nop.i 999;;
}
{ .mfi
nop.m 999
fma.s1 atanf_poly_q4 = atanf_t,atanf_coeff_Q1,atanf_coeff_Q0
nop.i 999
}
{ .mfi
nop.m 999
fma.s1 atanf_poly_q5 = atanf_t,atanf_coeff_Q3,atanf_coeff_Q2
nop.i 999;;
}
{ .mfi
nop.m 999
fma.s1 atanf_poly_p4 = f8,atanf_coeff_P1,f0
nop.i 999
}
{ .mfi
nop.m 999
fma.s1 atanf_poly_p5 = atanf_t,atanf_coeff_P4,atanf_coeff_P3
nop.i 999;;
}
{ .mfi
nop.m 999
fma.s1 atanf_poly_r1 = atanf_z8,atanf_poly_r1,f0
nop.i 999
}
{ .mfi
nop.m 999
fma.s1 atanf_z8_bsq = atanf_z8,atanf_bsq,f0
nop.i 999;;
}
{ .mfi
nop.m 999
fma.s1 atanf_poly_q2 = atanf_tsq,atanf_poly_q3,atanf_poly_q2
nop.i 999
}
{ .mfi
nop.m 999
fma.s1 atanf_poly_r2 = atanf_bsq,atanf_poly_r3,atanf_poly_r2
nop.i 999;;
}
{ .mfi
nop.m 999
fma.s1 atanf_poly_p2 = atanf_t,atanf_coeff_P8,atanf_coeff_P7
nop.i 999
}
{ .mfi
nop.m 999
fma.s1 atanf_poly_q1 = atanf_poly_q1,f1,atanf_tsq
nop.i 999;;
}
{ .mfi
nop.m 999
fma.s1 atanf_z13 = atanf_z5,atanf_z8,f0
nop.i 999
}
{ .mfi
nop.m 999
fma.s1 atanf_poly_p1 = atanf_t,atanf_coeff_P10,atanf_coeff_P9
nop.i 999;;
}
{ .mfi
nop.m 999
fma.s1 atanf_poly_p4 = atanf_t,atanf_poly_p4,f8
nop.i 999
}
{ .mfi
nop.m 999
fma.s1 atanf_poly_q4 = atanf_tsq,atanf_poly_q5,atanf_poly_q4
nop.i 999;;
}
{ .mfi
nop.m 999
fma.s1 atanf_poly_p3 = atanf_t,atanf_coeff_P6,atanf_coeff_P5
nop.i 999
}
{ .mfi
nop.m 999
fma.s1 atanf_poly_p5 = atanf_t,atanf_poly_p5,atanf_coeff_P2
nop.i 999;;
}
{ .mfi
nop.m 999
fma.s1 atanf_x11 = atanf_x5,atanf_x6,f0
nop.i 999
}
{ .mfi
nop.m 999
fma.s1 atanf_poly_r = atanf_z8_bsq,atanf_poly_r2,atanf_poly_r1
nop.i 999;;
}
{ .mfi
nop.m 999
fma atanf_sgnx_piby2 = atanf_sgn_x,atanf_piby2,f0
nop.i 999
}
{ .mfi
nop.m 999
fma.s1 atanf_poly_q2 = atanf_t4,atanf_poly_q1,atanf_poly_q2
nop.i 999;;
}
{ .mfi
nop.m 999
fma.s1 atanf_poly_p1 = atanf_tsq,atanf_poly_p1,atanf_poly_p2
nop.i 999;;
}
{ .mfi
nop.m 999
fma.s1 atanf_poly_p4 = atanf_x5,atanf_poly_p5,atanf_poly_p4
nop.i 999;;
}
{ .mfi
nop.m 999
fma.s1 atanf_z21_poly_r = atanf_z13,atanf_poly_r,f0
nop.i 999;;
}
{ .mfi
nop.m 999
fma.s1 atanf_poly_q = atanf_t4,atanf_poly_q2,atanf_poly_q4
nop.i 999;;
}
{ .mfi
nop.m 999
fma.s1 atanf_poly_p1 = atanf_tsq,atanf_poly_p1,atanf_poly_p3
nop.i 999;;
}
{ .mfi
nop.m 999
//(atanf_pred_GT1) fnma.s atanf_answer = atanf_poly_q,atanf_z21_poly_r,atanf_sgnx_piby2
(p7) fnma.s atanf_answer = atanf_poly_q,atanf_z21_poly_r,atanf_sgnx_piby2
nop.i 999;;
}
{ .mfb
nop.m 999
//(atanf_pred_LE1) fma.s atanf_answer = atanf_x11,atanf_poly_p1,atanf_poly_p4
(p6) fma.s atanf_answer = atanf_x11,atanf_poly_p1,atanf_poly_p4
br.ret.sptk b0
}
L(ATANF_X_INF_NAN_ZERO):
fclass.m p8,p9 = f8,0x23 // @inf
;;
(p8) fmerge.s f8 = f8, atanf_piby2
;;
fnorm.s f8 = f8
br.ret.sptk b0
.endp atanf
ASM_SIZE_DIRECTIVE(atanf)

1994
sysdeps/ia64/fpu/s_atanl.S Normal file

File diff suppressed because it is too large Load Diff

676
sysdeps/ia64/fpu/s_cbrt.S Normal file
View File

@ -0,0 +1,676 @@
.file "cbrt.asm"
// Copyright (c) 2000, 2001, Intel Corporation
// All rights reserved.
//
// Contributed 2/2/2000 by John Harrison, Cristina Iordache, Ted Kubaska,
// Bob Norin, Shane Story, and Ping Tak Peter Tang
// of the Computational Software Lab, Intel Corporation.
//
// WARRANTY DISCLAIMER
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
// http://developer.intel.com/opensource.
//
// History
//==============================================================
// 2/02/00: Initial version
// 5/19/00: New version (modified algorithm)
//
// API
//==============================================================
// double cbrt(double)
//
// Overview of operation
//==============================================================
// Background
//
// Implementation
//
// cbrt(a) = cbrt(a y) / cbrt(y)
// = cbrt(1 - (1 - a y)) * 1/cbrt(y)
//
// where y = frcpa(a).
//
// * cbrt(1 - (1 - a y)) is approximated by a degree-5 polynomial
//
// 1 - (1/3)*r - (1/9)*r^2 - (5/81)*r^3 - (10/243)*r^4 - (22/729)*r^5
//
// in r = 1 - a y.
//
// * The values 1/cbrt(y) are stored in a table of constants T0
// to 64 bits of accuracy
//
// The table values are stored for three exponent values and are
// then multiplied by e/3 where e is the exponent of the input number.
// This computation is carried out in parallel with the polynomial
// evaluation:
//
// T = 2^(e/3) * T0
//===============
// input = x
// C = frcpa(x)
// r = 1 - C * x
//
// Special values
//==============================================================
// Registers used
//==============================================================
// f6-f15
// r2, r23-r26, r28-r30
// p6,p7,p8,p12
#include "libm_support.h"
// Data tables
//==============================================================
#ifdef _LIBC
.rodata
#else
.data
#endif
.align 16
poly_coeffs:
ASM_TYPE_DIRECTIVE(poly_coeffs,@object)
data8 0xaaaaaaaaaaaaaaab, 0x00003ffd // 1/3
data8 0x3fbc71c71c71c71d, 0x3faf9add3c0ca459
data8 0x3fa511e8d2b3183b, 0x3f9ee7113506ac13
ASM_SIZE_DIRECTIVE(poly_coeffs)
T_table:
ASM_TYPE_DIRECTIVE(T_table,@object)
data8 0x80155c748c374836, 0xa160019ed37fb4ae
data8 0xcb51ddcb9e93095e, 0x8040404b0879f7f9
data8 0xa1960b5966da4608, 0xcb95f333968ad59b
data8 0x806b5dce4b405c10, 0xa1cc5dbe6dc2aab4
data8 0xcbda64292d3ffd97, 0x8096b586974669b1
data8 0xa202f97995b69c0d, 0xcc1f3184af961596
data8 0x80bcd273d952a028, 0xa232fe6eb0c0577d
data8 0xcc5bb1ac954d33e2, 0x80e898c52813f2f3
data8 0xa26a2582012f6e17, 0xcca12e9831fc6402
data8 0x81149add67c2d208, 0xa2a197e5d10465cb
data8 0xcce70a67b64f24ad, 0x813b4e2c856b6e9a
data8 0xa2d25a532efefbc8, 0xcd24794726477ea5
data8 0x8167c1dde03de7aa, 0xa30a5bd6e49e4ab8
data8 0xcd6b096a0b70ee87, 0x818ed973b811135e
data8 0xa33b9c9b59879e24, 0xcda9177738b15a90
data8 0x81bbc0c33e13ec98, 0xa3742fca6a3c1f21
data8 0xcdf05f2247dffab9, 0x81e33e69fbe7504a
data8 0xa3a5f1273887bf22, 0xce2f0f347f96f906
data8 0x820aec524e3c23e9, 0xa3d7ef508ff11574
data8 0xce6e0be0cd551a61, 0x823880f78e70b805
data8 0xa4115ce30548bc15, 0xceb666b2c347d1de
data8 0x826097a62a8e5200, 0xa443df0e53df577a
data8 0xcef609b0cb874f00, 0x8288dfe00e9b5eaf
data8 0xa4769fa5913c0ec3, 0xcf35fb5447e5c765
data8 0x82b15a10c5371624, 0xa4a99f303bc7def5
data8 0xcf763c47ee869f00, 0x82da06a527b18937
data8 0xa4dcde37779adf4b, 0xcfb6cd3888d71785
data8 0x8302e60b635ab394, 0xa5105d46152c938a
data8 0xcff7aed4fbfbb447, 0x832bf8b2feec2f0e
data8 0xa5441ce89825cb8d, 0xd038e1ce5167e3c6
data8 0x83553f0ce00e276b, 0xa5781dad3e54d899
data8 0xd07a66d7bfa0ebba, 0x837eb98b50f8322a
data8 0xa5ac602406c4e68c, 0xd0bc3ea6b32d1b21
data8 0x83a270f44c84f699, 0xa5d9601d95c2c0bc
data8 0xd0f4f0e8f36c1bf8, 0x83cc4d7cfcfac5ca
data8 0xa60e1e1a2de14745, 0xd1376458e34b037e
data8 0x83f65f78a8872b4c, 0xa6431f6e3fbd9658
data8 0xd17a2ca133f78572, 0x8420a75f2f7b53c8
data8 0xa67864b0d432fda4, 0xd1bd4a80301c5715
data8 0x844510461ff14209, 0xa6a6444aa0243c0b
data8 0xd1f71682b2fa4575, 0x846fbd91b930bed2
data8 0xa6dc094d10f25792, 0xd23ad555f773f059
data8 0x84947e18234f3294, 0xa70a574cc02bba69
data8 0xd2752c7039a5bf73, 0x84bf92755825045a
data8 0xa7409e2af9549084, 0xd2b98ee008c06b59
data8 0x84e4ac0ee112ba51, 0xa76f5c64ca2cf13b
data8 0xd2f4735ffd700280, 0x8509ef44b86f20be
data8 0xa79e4f0babab5dc0, 0xd32f99ed6d9ac0e1
data8 0x85359d5d91768427, 0xa7d5579ae5164b85
data8 0xd374f0666c75d51c, 0x855b3bd5b7384357
data8 0xa804bd3c6fe61cc8, 0xd3b0a7d13618e4a1
data8 0x858104f0c415f79a, 0xa8345895e5250a5a
data8 0xd3eca2ea53bcec0c, 0x85a6f90390d29864
data8 0xa8642a122b44ef0b, 0xd428e23874f13a17
data8 0x85d3772fcd56a1dd, 0xa89c38ca18f6108b
data8 0xd46f82fe293bc6d3, 0x85f9c982fcc002f3
data8 0xa8cc81063b6e87ca, 0xd4ac57e9b7186420
data8 0x862047e0e7ea554b, 0xa8fd00bfa409285e
data8 0xd4e972becb04e8b8, 0x8646f2a26f7f5852
data8 0xa92db8664d5516da, 0xd526d40a7a9b43a3
data8 0x866dca21754096b5, 0xa95ea86b75cc2c20
data8 0xd5647c5b73917370, 0x8694ceb8dfd17a37
data8 0xa98fd141a4992deb, 0xd5a26c4201bd6d13
data8 0x86bc00c49e9307e8, 0xa9c1335cae7446ba
data8 0xd5e0a45015350a7e, 0x86dccd74fce79610
data8 0xa9ea8686f556f645, 0xd614b539c6194104
data8 0x870453c845acf90f, 0xaa1c52d17906bb19
data8 0xd6537310e224283f, 0x872c089a1e90342c
data8 0xaa4e59b046dab887, 0xd6927ab62244c917
data8 0x8753ec4a92d16c5e, 0xaa809b9c60d1890b
data8 0xd6d1ccc1fc4ef4b7, 0x877bff3aca19f6b4
data8 0xaab319102f3f9b33, 0xd71169cea98fdded
data8 0x879d88b6fe1c324c, 0xaadd5a18c1e21274
data8 0xd746a66a5bc9f6d9, 0x87c5f346dbf98c3a
data8 0xab1045f2ac31bdf5, 0xd786ce8f0fae5317
data8 0x87e7c653efacef2c, 0xab3ae3ab2df7231e
data8 0xd7bc7ff214c4e75a, 0x881089d4e73ffefc
data8 0xab6e3f945d1e96fc, 0xd7fd35467a517ed1
data8 0x88397e6a366f2a8a, 0xaba1d953a08fa94e
data8 0xd83e38838648d815, 0x885bc559e5e1c081
data8 0xabcd090db7ef4c3f, 0xd874a1db598b8951
data8 0x887e2ee392bb7a93, 0xabf864602d7c323d
data8 0xd8ab42205b80edaf, 0x88a7a8587e404257
data8 0xac2ca5886ccf9b57, 0xd8ed1849d202f965
data8 0x88ca5eda67594784, 0xac5861d4aa441f0f
data8 0xd92432bd5a173685, 0x88f4356166bd590e
data8 0xac8d183fe3a2fbed, 0xd9669ca45b03c23e
data8 0x89173a0acf5ce026, 0xacb93703ff51571e
data8 0xd99e3327cf89574e, 0x893a62a098b6a57b
data8 0xace5830ad0c3f14b, 0xd9d602b19b100466
data8 0x895daf637236ae2c, 0xad11fca5d78b3ff2
data8 0xda0e0ba86c096841, 0x89883b9d1c2fa9c5
data8 0xad4797fddf91a798, 0xda5195fcdb1c3dce
data8 0x89abd8dd374a5d7b, 0xad747701e559ebcb
data8 0xda8a1eb87a491f6c, 0x89cf9b1dcd197fa0
data8 0xada184a47e9c7613, 0xdac2e230b91c3f84
data8 0x89f382a258ea79de, 0xadcec13ab0dda8ff
data8 0xdafbe0d0b66aea30, 0x8a178faf06648f29
data8 0xadfc2d1a5fd21ba8, 0xdb351b04a8fafced
data8 0x8a3bc288b3e1d18a, 0xae29c89a5053c33a
data8 0xdb6e9139e33cdd8e, 0x8a601b74f4d1f835
data8 0xae5794122b638df9, 0xdba843ded7151ea1
data8 0x8a849aba14274764, 0xae858fda8137ae0a
data8 0xdbe2336319b61fc8, 0x8aa9409f16cdbc9b
data8 0xaeb3bc4ccc56d3d1, 0xdc1c60376789fa68
data8 0x8ace0d6bbe2cb316, 0xaee219c374c09920
data8 0xdc56cacda82d0cd5, 0x8af301688ab33558
data8 0xaf10a899d3235fe7, 0xdc917398f2797814
data8 0x8b181cdebe6f3206, 0xaf3f692c341fe8b4
data8 0xdccc5b0d90a3e628, 0x8b3d60185fafcb7c
data8 0xaf6e5bd7db9ae6c2, 0xdd0781a10469f0f2
data8 0x8b62cb603bb2fad0, 0xaf9d80fb081cd91b
data8 0xdd42e7ca0b52838f, 0x8b80d7d6bc4104de
data8 0xafc35ce063eb3787, 0xdd729ad01c69114d
data8 0x8ba68bf73ac74f39, 0xaff2ddcb5f28f03d
data8 0xddae749c001fbf5e, 0x8bcc68fb9f9f7335
data8 0xb022923b148e05c5, 0xddea8f50a51c69b1
data8 0x8bf26f31c534fca2, 0xb0527a919adbf58b
data8 0xde26eb69a0f0f111, 0x8c10f86e13a1a1f9
data8 0xb078f3ab1d701c65, 0xde576480262399bc
data8 0x8c3749916cc6abb5, 0xb0a93a6870649f31
data8 0xde943789645933c8, 0x8c5dc4c4f7706032
data8 0xb0d9b624d62ec856, 0xded14d58139a28af
data8 0x8c7cac3a8c42e3e0, 0xb100a5f53fb3c8e1
data8 0xdf025c00bbf2b5c7, 0x8ca373f1b7bf2716
data8 0xb131821882f5540a, 0xdf3feb44d723a713
data8 0x8cc29907fb951294, 0xb158bf8e4cb04055
data8 0xdf715bc16c159be0, 0x8ce9ae4e9492aac8
data8 0xb189fd69d56b238f, 0xdfaf66240e29cda8
data8 0x8d0911dddbfdad0e, 0xb1b189958e8108e4
data8 0xdfe139cbf6e19bdc, 0x8d3075c4f20f04ee
data8 0xb1e32a8165b09832, 0xe01fc0fe94d9fc52
data8 0x8d5018a9d4de77d5, 0xb20b0678fc271eec
data8 0xe051f92ffcc0bd60, 0x8d77cc47dd143515
data8 0xb23d0bd3f7592b6e, 0xe090feec9c9a06ac
data8 0x8d97af6352739cb7, 0xb26538b2db8420dc
data8 0xe0c39d0c9ff862d6, 0x8db7af523167800f
data8 0xb28d89e339ceca14, 0xe0f668eeb99f188d
data8 0x8ddfd80bc68c32ff, 0xb2c022ca12e55a16
data8 0xe1362890eb663139, 0x8e00197e1e7c88fe
data8 0xb2e8c6852c6b03f1, 0xe1695c7212aecbaa
data8 0x8e207859f77e20e7, 0xb3118f4eda9fe40f
data8 0xe19cbf0391bbbbe9, 0x8e40f4ce60c9f8e2
data8 0xb33a7d6268109ebe, 0xe1d050901c531e85
data8 0x8e69ba46cf2fde4d, 0xb36ddbc5ea70ec55
data8 0xe2110903b4f4047a, 0x8e8a7a00bd7ae63e
data8 0xb3971e9b39264023, 0xe2450559b4d80b6d
data8 0x8eab57ef1cf2f529, 0xb3c0877ecc18e24a
data8 0xe27931a231554ef3, 0x8ecc5442cffb1dad
data8 0xb3ea16ae3a6c905f, 0xe2ad8e2ac3c5b04b
data8 0x8eed6f2d2a4acbfe, 0xb413cc67aa0e4d2d
data8 0xe2e21b41b9694cce, 0x8f0ea8dff24441ff
data8 0xb43da8e9d163e1af, 0xe316d93615862714
data8 0x8f385c95d696b817, 0xb47233773b84d425
data8 0xe3590bd86a0d30f9, 0x8f59dc43edd930f3
data8 0xb49c6825430fe730, 0xe38e38e38e38e38e
data8 0x8f7b7b5f5ffad1c4, 0xb4c6c46bcdb27dcf
data8 0xe3c397d1e6db7839, 0x8f9d3a1bea165f38
data8 0xb4f1488c0b35d26f, 0xe3f928f5953feb9e
data8 0x8fbf18adc34b66da, 0xb51bf4c7c51f0168
data8 0xe42eeca17c62886c, 0x8fe117499e356095
data8 0xb546c9616087ab9c, 0xe464e32943446305
data8 0x90033624aa685f8d, 0xb571c69bdffd9a70
data8 0xe49b0ce15747a8a2, 0x9025757495f36b86
data8 0xb59cecbae56984c3, 0xe4d16a1eee94e9d4
data8 0x903f3a5dcc091203, 0xb5bd64512bb14bb7
data8 0xe4fa52107353f67d, 0x9061b2fceb2bdbab
data8 0xb5e8d2a4bf5ba416, 0xe5310a471f4d2dc3
data8 0x90844ca7211032a7, 0xb6146a9a1bc47819
data8 0xe567f6f1c2b9c224, 0x90a7079403e6a15d
data8 0xb6402c7749d621c0, 0xe59f18689a9e4c9a
data8 0x90c9e3fbafd63799, 0xb66c1882fb435ea2
data8 0xe5d66f04b8a68ecf, 0x90ece216c8a16ee4
data8 0xb6982f048c999a56, 0xe60dfb2005c192e9
data8 0x9110021e7b516f0a, 0xb6c47044075b4142
data8 0xe645bd1544c7ea51, 0x912a708a39be9075
data8 0xb6e5bd6bfd02bafd, 0xe66fb21b505b20a0
data8 0x914dcc7b31146370, 0xb7124a2736ff8ef2
data8 0xe6a7d32af4a7c59a, 0x91714af8cfe984d5
data8 0xb73f026a01e94177, 0xe6e02b129c6a5ae4
data8 0x918c00a6f3795e97, 0xb760a959f1d0a7a7
data8 0xe70a9136a7403039, 0x91afbc299ed0295d
data8 0xb78dae7e06868ab0, 0xe74349fb2d92a589
data8 0x91d39add3e958db0, 0xb7badff8ad9e4e02
data8 0xe77c3a9c86ed7d42, 0x91ee9920a8974d92
data8 0xb7dce25b8e17ae9f, 0xe7a713f88151518a
data8 0x9212b5fcac537c19, 0xb80a6226904045e2
data8 0xe7e067453317ed2b, 0x9236f6b256923fcf
data8 0xb8380f1cafd73c1c, 0xe819f37a81871bb5
data8 0x92523ee6f90dcfc3, 0xb85a6ea8e321b4d8
data8 0xe8454236bfaeca14, 0x9276bef031e6eb79
data8 0xb8886b684ae7d2fa, 0xe87f32f24c3fc90e
data8 0x929236ec237a24ad, 0xb8ab0726fa00cf5d
data8 0xe8aacd8688892ba6, 0x92b6f70b7efe9dc3
data8 0xb8d954a4d13b7cb1, 0xe8e523fd32f606f7
data8 0x92d29f61eec7dc2b, 0xb8fc2d4f6cd9f04a
data8 0xe9110b5311407927, 0x92f7a05d5b8ba92f
data8 0xb92acc851476b1ab, 0xe94bc8bf0c108fa3
data8 0x931379a403be5c16, 0xb94de2d841a184c2
data8 0xe977fdc439c2ca3c, 0x9338bc44de2e3f34
data8 0xb97cd4c36c92693c, 0xe9b3236528fc349e
data8 0x9354c71412c69486, 0xb9a0297f172665e3
data8 0xe9dfa70b745ac1b4, 0x937a4c273907e262
data8 0xb9cf6f21e36c3924, 0xea1b36268d0eaa38
data8 0x93968919f6e7975d, 0xb9f3030951267208
data8 0xea480963fd394197, 0x93bc516fdd4680c9
data8 0xba229d6a618e7c59, 0xea84034425f27484
data8 0x93d8c123d9be59b2, 0xba467144459f9855
data8 0xeab12713138dd1cc, 0x93f546c955e60076
data8 0xba6a60c3c48f1a4b, 0xeade6db73a5e503b
data8 0x941b70a65879079f, 0xba9a76056b67ee7a
data8 0xeb1b0268343b121b, 0x943829f337410591
data8 0xbabea699563ada6e, 0xeb489b0b2bdb5f14
data8 0x9454f995765bc4d2, 0xbae2f350b262cc4b
data8 0xeb765721e85f03d0, 0x947b86b57f5842ed
data8 0xbb1385a23be24e57, 0xebb389645f222f62
data8 0x94988aeb23470f86, 0xbb3814975e17c680
data8 0xebe198f090607e0c, 0x94b5a5dc9695f42a
data8 0xbb5cc031009bf467, 0xec0fcc9321024509
data8 0x94d2d7a9170d8b42, 0xbb81889680024764
data8 0xec3e247da8b82f61, 0x94f9e87dd78bf019
data8 0xbbb2c0d8703ae95d, 0xec7c27d21321c9f7
data8 0x95175019a503d89e, 0xbbd7cd09ba3c5463
data8 0xecaad5278824e453, 0x9534cefa625fcb3a
data8 0xbbfcf68c4977718f, 0xecd9a76d097d4e77
data8 0x955265405c491a25, 0xbc223d88cfc88eee
data8 0xed089ed5dcd99446, 0x9570130c1f9bb857
data8 0xbc47a2284fee4ff8, 0xed37bb95add09a1c
data8 0x9597ca4119525184, 0xbc79ac0916ed7b8a
data8 0xed76c70508f904b6, 0x95b5af6fb5aa4d3c
data8 0xbc9f5670d1a13030, 0xeda63bb05e7f93c6
data8 0x95d3ac9273aafd7a, 0xbcc51f068cb95c1d
data8 0xedd5d661daed2dc4, 0x95f1c1cafdfd3684
data8 0xbceb05f4b30a9bc0, 0xee05974eef86b903
data8 0x960fef3b430b8d5f, 0xbd110b6604c7d306
data8 0xee357ead791fc670, 0x962e350575b409c5
data8 0xbd372f8598620f19, 0xee658cb3c134a463
data8 0x964c934c0dfc1708, 0xbd5d727edb6b3c7e
data8 0xee95c1987f080211, 0x966b0a31c9c6bc7d
data8 0xbd83d47d937bbc6d, 0xeec61d92d8c4314f
data8 0x968999d9ad8d264e, 0xbdaa55addf1ae47d
data8 0xeef6a0da64a014ac, 0x96a8426705198795
data8 0xbdd0f63c36aa73f0, 0xef274ba72a07c811
data8 0x96c703fd64445ee5, 0xbdf7b6556d550a15
data8 0xef581e31a2c91260, 0x96e5dec0a7b4268d
data8 0xbe1e9626b1ffa96b, 0xef8918b2bc43aec6
data8 0x9704d2d4f59f79f3, 0xbe4595dd903e5371
data8 0xefba3b63d89d7cbf, 0x9723e05ebe91b9b0
data8 0xbe6cb5a7f14bc935, 0xefeb867ecffaa607
data8 0x97430782be323831, 0xbe93f5b41d047cf7
data8 0xf01cfa3df1b9c9fa, 0x97624865fc0df8bf
data8 0xbebb5630bae4c15f, 0xf04e96dc05b43e2d
data8 0x9781a32dcc640b2a, 0xbee2d74cd30a430c
data8 0xf0805c944d827454, 0x97a117ffd0f48e46
data8 0xbf0a7937cf38d981, 0xf0b24ba285c495cb
data8 0x97c0a701f9d263c9, 0xbf323c217be2bc8c
data8 0xf0e46442e76f6569, 0x97e0505a8637a036
data8 0xbf5a203a09342bbb, 0xf116a6b2291d7896
data8 0x97f57a9fb0b08c6e, 0xbf74cad1c14ebfc4
data8 0xf1383fa9e9b5b381, 0x9815503365914a9d
data8 0xbf9ce6a497a89f78, 0xf16ac84f90083b9b
data8 0x98354085054fd204, 0xbfc52428bec6e72f
data8 0xf19d7b686dcb03d7, 0x98554bbbf8a77902
data8 0xbfed838fddab024b, 0xf1d0593311db1757
data8 0x987571fffb7f94f6, 0xc016050c0420981a
data8 0xf20361ee8f1c711e, 0x9895b3791dd03c23
data8 0xc03ea8cfabddc330, 0xf23695da7de51d3f
data8 0x98ab43a5fc65d0c8, 0xc059d3cbd65ddbce
data8 0xf258d095e465cc35, 0x98cbb2d196bd713d
data8 0xc082b122a3c78c9d, 0xf28c4d0bfc982b34
data8 0x98ec3d9ec7b6f21a, 0xc0abb1499ae736c4
data8 0xf2bff55eb3f0ea71, 0x990ce436db5e8344
data8 0xc0d4d474c3aedaaf, 0xf2f3c9cf9884636e
data8 0x9922b8218160967a, 0xc0f054ca33eb3437
data8 0xf31670135ab9cc0f, 0x99438d686f75779d
data8 0xc119b2c67e600ed0, 0xf34a8e9f0b54cdfb
data8 0x99647eea131fa20b, 0xc1433453de2033ff
data8 0xf37ed9fa6b8add3f, 0x997a85045a47c6d0
data8 0xc15ef3e44e10032d, 0xf3a1cfe884ef6bb6
data8 0x999ba5f14f8add02, 0xc188b130431d80e6
data8 0xf3d66689dcc8e8d3, 0x99bce38b5465ecae
data8 0xc1b2929d6067730e, 0xf40b2ab069d5c96a
data8 0x99d31ca0887f30f9, 0xc1ce9268f31cc734
data8 0xf42e718b90c8bc16, 0x99f48a669c74c09e
data8 0xc1f8b0877c1b0c08, 0xf463822a0a3b4b00
data8 0x9a16154eb445c873, 0xc222f35a87b415ba
data8 0xf498c1076015faf8, 0x9a2c822ec198d667
data8 0xc23f3467349e5c88, 0xf4bc5a19a33990b5
data8 0x9a4e3e080cd91b78, 0xc269b4e40e088c01
data8 0xf4f1e6a7d6f5425f, 0x9a70177afe52322e
data8 0xc2945aac24daaf6e, 0xf527a232cf6be334
data8 0x9a86b8fa94eebe10, 0xc2b0de05e43c1d66
data8 0xf54b8ecdcda90851, 0x9aa8c42866ae2958
data8 0xc2dbc275e1229d09, 0xf5819949c7ad87b4
data8 0x9abf86f9e12fc45e, 0xc2f86fca9d80eeff
data8 0xf5a5bac9213b48a9, 0x9ae1c462fc05f49d
data8 0xc323938449a2587e, 0xf5dc1501f324a812
data8 0x9af8a8dc936b84d0, 0xc3406b40a538ed20
data8 0xf6006bee86b5589e, 0x9b1b19033be35730
data8 0xc36bcee8211d15e0, 0xf63716b2fa067fa4
data8 0x9b3da7daf04c2892, 0xc397593adf2ba366
data8 0xf66df22fb6132b9c, 0x9b54c2e4c8a9012b
data8 0xc3b475b6206155d5, 0xf6929fb98225deb1
data8 0x9b77854e6c661200, 0xc3e0410243b97383
data8 0xf6c9cd13021e3fea, 0x9b8ec2e678d56d2f
data8 0xc3fd890709833d37, 0xf6eeb177472cedae
data8 0x9ba60e6a5ca133b6, 0xc41ae295f7e7fa06
data8 0xf713abf4cb0b3afb, 0x9bc919ea66a151a4
data8 0xc44709f7bb8a4dd2, 0xf74b4d5333684ef1
data8 0x9be0887c09ef82bb, 0xc4648fb0e0bec4c1
data8 0xf7707f75a72f8e94, 0x9c03c8d5fffc3503
data8 0xc490f9a94695ba14, 0xf7a874b97927af44
data8 0x9c1b5ad21a81cbb9, 0xc4aeac0173b7d390
data8 0xf7cddf140aedf1d8, 0x9c3ed09216e9ca02
data8 0xc4db5941007aa853, 0xf806291bacb7f7a9
data8 0x9c568656c0423def, 0xc4f938aec206291a
data8 0xf82bcc43b92eafef, 0x9c7a320af242ce60
data8 0xc52629e899dfd622, 0xf8646bf0defb759e
data8 0x9c920bf7a8c01dc2, 0xc54436e44043b965
data8 0xf88a487dfc3ff5f7, 0x9ca9f475d98b159c
data8 0xc562563abf9ea07f, 0xf8b03c2b46cdc17f
data8 0x9ccdeca60e80b5f8, 0xc58fa7d1dc42921c
data8 0xf8e95541c152ae7a, 0x9ce5f9d4653d4902
data8 0xc5adf561b91e110a, 0xf90f832c2700c160
data8 0x9cfe15cb38bfdd8e, 0xc5cc5591bdbd82fa
data8 0xf935c88e0c7f419b, 0x9d225b983f6c1f96
data8 0xc5fa08f1ff20593c, 0xf96f5cd84fd86873
data8 0x9d3a9cca32261ed7, 0xc618980a79ce6862
data8 0xf995dd53ebdd9d6d, 0x9d52ecfccebe1768
data8 0xc6373a09e34b50fa, 0xf9bc75a034436a41
data8 0x9d77818d95b82f86, 0xc66550a6e0baaf35
data8 0xf9f686f26d5518de, 0x9d8ff7893fa4706c
data8 0xc6842241926342c9, 0xfa1d5b39b910a8c5
data8 0x9da87cbef36f2a5e, 0xc6a3070b7c93bb9e
data8 0xfa4447acc4ecbfd2, 0x9dcd6140b4a35aeb
data8 0xc6d18260bb84081b, 0xfa7ed7e51e6fdfb4
data8 0x9de60cd06dc6e2d4, 0xc6f0977c9416828b
data8 0xfaa601394d49a1a0, 0x9dfec7d4cc43b76f
data8 0xc70fc0117c641630, 0xfacd431644ce0e40
data8 0x9e17925ec9fccc4a, 0xc72efc34d7e615be
data8 0xfaf49d96f7a75909, 0x9e3cdf6db57dc075
data8 0xc75dfb441594141e, 0xfb2fd3c65e562fd5
data8 0x9e55d110b63637a8, 0xc77d68aa019bda4c
data8 0xfb576c5762024805, 0x9e6ed27594550d2e
data8 0xc79ce9ea478dbc4f, 0xfb7f1debc22c4040
data8 0x9e87e3adc385d393, 0xc7bc7f1ae453219d
data8 0xfba6e89f32d0190a, 0x9ead9b54b37a1055
data8 0xc7ec0476e15e141a, 0xfbe2c803a0894893
data8 0x9ec6d46a3d7de215, 0xc80bcbe16f1d540f
data8 0xfc0ad1ff0ed9ecf0, 0x9ee01d9108be3154
data8 0xc82ba78a5d349735, 0xfc32f57bdfbcbe7f
data8 0x9ef976db07288d04, 0xc84b978847a06b87
data8 0xfc5b32968f99b21c, 0x9f12e05a4759ec25
data8 0xc86b9bf1ee817bc6, 0xfc83896bc861ab08
data8 0x9f2c5a20f4da6668, 0xc88bb4de3667cdf4
data8 0xfcabfa1861ed4815, 0x9f52af78ed1733ca
data8 0xc8bc00e7fe9e23a3, 0xfce8d3cea7d3163e
data8 0x9f6c52426a39d003, 0xc8dc4d7ff2d25232
data8 0xfd118595143ee273, 0x9f860593d42fd7f3
data8 0xc8fcaeebcb40eb47, 0xfd3a519943d4865a
data8 0x9f9fc97fdb96bd51, 0xc91d25431426a663
data8 0xfd6337f8e1ae5a4b, 0x9fb99e194f4a7037
data8 0xc93db09d7fdb2949, 0xfd8c38d1c8e927eb
data8 0x9fd383731ca51db9, 0xc95e5112e721582a
data8 0xfdb5544205095a53, 0x9fed79a04fbf9423
data8 0xc97f06bb49787677, 0xfdde8a67d2613531
data8 0xa00780b413b24ee8, 0xc99fd1aecd6e1b06
data8 0xfe07db619e781611, 0xa02eab2c4474b0cd
data8 0xc9d12a3e27bb1625, 0xfe460768d80bf758
data8 0xa048dcd51ccfd142, 0xc9f22ad82ba3d5f0
data8 0xfe6f9bfb06cd32f6, 0xa0631fa894b11b8d
data8 0xca134113105e67b2, 0xfe994bcd3d14fcc2
data8 0xa07d73ba65e680af, 0xca346d07b045a876
data8 0xfec316fecaf3f2ab, 0xa097d91e6aaf71b0
data8 0xca55aecf0e94bb88, 0xfeecfdaf33fadb80
data8 0xa0b24fe89e02602f, 0xca77068257be9bab
data8 0xff16fffe2fa8fad6, 0xa0ccd82d1bd2f68b
data8 0xca98743ae1c693a8, 0xff411e0ba9db886d
data8 0xa0e77200215909e6, 0xcab9f8122c99a101
data8 0xff6b57f7c33e4e9a, 0xa1021d760d584855
data8 0xcadb9221e268c3b5, 0xff95ade2d1bd7358
data8 0xa11cdaa36068a57d, 0xcafd4283d8043dfd
data8 0xffc01fed60f86fb5, 0xa137a99cbd3f880b
data8 0xcb1f09520d37c6fb, 0xffeaae3832b63956
ASM_SIZE_DIRECTIVE(T_table)
.align 32
.global cbrt#
.section .text
.proc cbrt#
.align 32
cbrt:
{ .mfi
// get significand
getf.sig r23=f8
// will continue only for normal/denormal numbers
(p0) fclass.nm.unc p12,p0 = f8, 0x1b
// r2 = pointer to C_1,...,C_5 followed by T_table
addl r2 = @ltoff(poly_coeffs), gp
}
{.mfi
// get exponent
getf.exp r24=f8
// normalize a
fma.s1 f14=f8,f1,f0
// r29=bias-((2^{12}-1)/3) -63=0xffff-0x555-0x3f=0xfa6b
mov r29=0xfa6b;;
}
{.mlx
mov r25=0x20000
// r28=2^52
movl r28=0x8000000000000000;;
}
{.mfb
// load start address for C_1,...,C_5 followed by T_table
ld8 r3=[r2]
(p12) fma.d.s0 f8=f8,f1,f0
(p12) br.ret.spnt b0
}
{.mfi
nop.m 0
// y=frcpa(a)
frcpa.s0 f8,p6=f1,f8
// p7=1 if denormal input
cmp.gtu p7,p0=r28,r23;;
}
{.mmi
// get exponent
(p7) getf.exp r24=f14
// get normalized significand
(p7) getf.sig r23=f14
// r28=bias-(2^{12}-1)
mov r28=0xf000;;
}
{.mii
// get r26=sign
and r26=r24,r25
// eliminate leading 1 from r23=1st table index
shl r23=r23,1
// eliminate sign from exponent (r25)
andcm r25=r24,r25;;
}
{.mib
add r2=32,r3
// r23=1st table index (y_index,8 bits)
shr.u r23=r23,56
nop.b 0
}
{.mib
// load C_1
ldfe f7=[r3],16
// subtract bias from r25=exponent
sub r25=r25,r28
nop.b 0;;
}
{.mib
// load C_2, C_3
ldfpd f9,f10=[r3]
// 1: exponent*=5; // (2^{16}-1)/3=0x5555
shladd r24=r25,2,r25
nop.b 0
}
{.mib
// load C_4, C_5
ldfpd f11,f12=[r2],16
// r23=3*y_index
shladd r23=r23,1,r23
nop.b 0;;
}
{.mfi
// r30=(5*expon)*16+5*expon=(0x55)*expon
shladd r30=r24,4,r24
// r=1-a*y
(p6) fnma.s1 f6=f8,f14,f1
// adjust T_table pointer by 1st index
shladd r2=r23,3,r2;;
}
{.mii
nop.m 0
// r24=(0x5500)*expon
shl r24=r30,8;;
// r24=(0x5555)*expon
add r24=r24,r30;;
}
{.mii
// r24=(0x5556)*expon // 0x5556=(2^{16}+2)/3
add r24=r24,r25
nop.i 0;;
// r24=floor(expon/3)
shr r24=r24,16;;
}
{.mfi
// r28=3*exponent
shladd r28=r24,1,r24
// r2=r*r
(p6) fma.s1 f13=f6,f6,f0
// bias exponent
add r24=r29,r24;;
}
{.mfi
// get remainder of exponent/3 : r25-r28
sub r25=r25,r28
// c2+c3*r
(p6) fma.s1 f9=f10,f6,f9
// add sign to exponent
or r24=r24,r26
}
{.mfi
nop.m 0
// c4+c5*r
(p6) fma.s1 f11=f12,f6,f11
nop.i 0;;
}
{.mmi
// f14=sign*2^{exponent/3}
(p6) setf.exp f14=r24
// adjust T_table pointer by 2nd index
shladd r2=r25,3,r2
nop.i 0;;
}
{.mmi
// load T
(p6) ldf8 f8=[r2]
nop.m 0
nop.i 0;;
}
{.mfi
nop.m 0
// (c2+c3*r)+r^2*(c4+c5*r)
(p6) fma.s1 f9=f11,f13,f9
nop.i 0
}
{.mfi
nop.m 0
// c1*r
(p6) fma.s1 f7=f7,f6,f0
nop.i 0;;
}
{.mfi
nop.m 0
// P=c1*r+r^2*[(c2+c3*r)+r^2*(c4+c5*r)]
(p6) fma.s1 f9=f9,f13,f7
nop.i 0
}
{.mfi
nop.m 0
// T'=T*(2^exp)
(p6) fma.s1 f8=f8,f14,f0
nop.i 0;;
}
{.mfb
nop.m 0
// result = T'-T'*P
(p6) fnma.d.s0 f8=f8,f9,f8
br.ret.sptk b0;;
}
.endp cbrt
ASM_SIZE_DIRECTIVE(cbrt)

655
sysdeps/ia64/fpu/s_cbrtf.S Normal file
View File

@ -0,0 +1,655 @@
.file "cbrtf.asm"
// Copyright (c) 2000, 2001, Intel Corporation
// All rights reserved.
//
// Contributed 2/2/2000 by John Harrison, Cristina Iordache, Ted Kubaska,
// Bob Norin, Shane Story, and Ping Tak Peter Tang
// of the Computational Software Lab, Intel Corporation.
//
// WARRANTY DISCLAIMER
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
// http://developer.intel.com/opensource.
//
// History
//==============================================================
// 2/02/00: Initial version
// 5/18/00: New version (modified algorithm)
//
// API
//==============================================================
// float cbrtf(float)
//
// Overview of operation
//==============================================================
// Background
//
// Implementation
//
// cbrt(a) = cbrt(a y) / cbrt(y)
// = cbrt(1 - (1 - a y)) * 1/cbrt(y)
//
// where y = frcpa(a).
//
// * cbrt(1 - (1 - a y)) is approximated by a degree-2 polynomial
//
// 1 - (1/3)*r - (1/9)*r^2
//
// in r = 1 - a y.
//
// * The values 1/cbrt(y) are stored in a table of constants T0
// to 64 bits of accuracy
//
// The table values are stored for three exponent values and are
// then multiplied by e/3 where e is the exponent of the input number.
// This computation is carried out in parallel with the polynomial
// evaluation:
//
// T = 2^(e/3) * T0
//===============
// input = x
// C = frcpa(x)
// r = 1 - C * x
//
// Special values
//==============================================================
// Registers used
//==============================================================
// f6-f15
// r2, r23-r26, r28-r30
// p6,p7,p8,p12
#include "libm_support.h"
// Data tables
//==============================================================
#ifdef _LIBC
.rodata
#else
.data
#endif
.align 16
poly_coeffs:
ASM_TYPE_DIRECTIVE(poly_coeffs,@object)
data8 0xaaaaaaaaaaaaaaab, 0x00003ffd // 1/3
data8 0xe38e38e38e38e38e, 0x00003ffb // 1/9
ASM_SIZE_DIRECTIVE(poly_coeffs)
T_table:
ASM_TYPE_DIRECTIVE(T_table,@object)
data8 0x80155c748c374836, 0xa160019ed37fb4ae
data8 0xcb51ddcb9e93095e, 0x8040404b0879f7f9
data8 0xa1960b5966da4608, 0xcb95f333968ad59b
data8 0x806b5dce4b405c10, 0xa1cc5dbe6dc2aab4
data8 0xcbda64292d3ffd97, 0x8096b586974669b1
data8 0xa202f97995b69c0d, 0xcc1f3184af961596
data8 0x80bcd273d952a028, 0xa232fe6eb0c0577d
data8 0xcc5bb1ac954d33e2, 0x80e898c52813f2f3
data8 0xa26a2582012f6e17, 0xcca12e9831fc6402
data8 0x81149add67c2d208, 0xa2a197e5d10465cb
data8 0xcce70a67b64f24ad, 0x813b4e2c856b6e9a
data8 0xa2d25a532efefbc8, 0xcd24794726477ea5
data8 0x8167c1dde03de7aa, 0xa30a5bd6e49e4ab8
data8 0xcd6b096a0b70ee87, 0x818ed973b811135e
data8 0xa33b9c9b59879e24, 0xcda9177738b15a90
data8 0x81bbc0c33e13ec98, 0xa3742fca6a3c1f21
data8 0xcdf05f2247dffab9, 0x81e33e69fbe7504a
data8 0xa3a5f1273887bf22, 0xce2f0f347f96f906
data8 0x820aec524e3c23e9, 0xa3d7ef508ff11574
data8 0xce6e0be0cd551a61, 0x823880f78e70b805
data8 0xa4115ce30548bc15, 0xceb666b2c347d1de
data8 0x826097a62a8e5200, 0xa443df0e53df577a
data8 0xcef609b0cb874f00, 0x8288dfe00e9b5eaf
data8 0xa4769fa5913c0ec3, 0xcf35fb5447e5c765
data8 0x82b15a10c5371624, 0xa4a99f303bc7def5
data8 0xcf763c47ee869f00, 0x82da06a527b18937
data8 0xa4dcde37779adf4b, 0xcfb6cd3888d71785
data8 0x8302e60b635ab394, 0xa5105d46152c938a
data8 0xcff7aed4fbfbb447, 0x832bf8b2feec2f0e
data8 0xa5441ce89825cb8d, 0xd038e1ce5167e3c6
data8 0x83553f0ce00e276b, 0xa5781dad3e54d899
data8 0xd07a66d7bfa0ebba, 0x837eb98b50f8322a
data8 0xa5ac602406c4e68c, 0xd0bc3ea6b32d1b21
data8 0x83a270f44c84f699, 0xa5d9601d95c2c0bc
data8 0xd0f4f0e8f36c1bf8, 0x83cc4d7cfcfac5ca
data8 0xa60e1e1a2de14745, 0xd1376458e34b037e
data8 0x83f65f78a8872b4c, 0xa6431f6e3fbd9658
data8 0xd17a2ca133f78572, 0x8420a75f2f7b53c8
data8 0xa67864b0d432fda4, 0xd1bd4a80301c5715
data8 0x844510461ff14209, 0xa6a6444aa0243c0b
data8 0xd1f71682b2fa4575, 0x846fbd91b930bed2
data8 0xa6dc094d10f25792, 0xd23ad555f773f059
data8 0x84947e18234f3294, 0xa70a574cc02bba69
data8 0xd2752c7039a5bf73, 0x84bf92755825045a
data8 0xa7409e2af9549084, 0xd2b98ee008c06b59
data8 0x84e4ac0ee112ba51, 0xa76f5c64ca2cf13b
data8 0xd2f4735ffd700280, 0x8509ef44b86f20be
data8 0xa79e4f0babab5dc0, 0xd32f99ed6d9ac0e1
data8 0x85359d5d91768427, 0xa7d5579ae5164b85
data8 0xd374f0666c75d51c, 0x855b3bd5b7384357
data8 0xa804bd3c6fe61cc8, 0xd3b0a7d13618e4a1
data8 0x858104f0c415f79a, 0xa8345895e5250a5a
data8 0xd3eca2ea53bcec0c, 0x85a6f90390d29864
data8 0xa8642a122b44ef0b, 0xd428e23874f13a17
data8 0x85d3772fcd56a1dd, 0xa89c38ca18f6108b
data8 0xd46f82fe293bc6d3, 0x85f9c982fcc002f3
data8 0xa8cc81063b6e87ca, 0xd4ac57e9b7186420
data8 0x862047e0e7ea554b, 0xa8fd00bfa409285e
data8 0xd4e972becb04e8b8, 0x8646f2a26f7f5852
data8 0xa92db8664d5516da, 0xd526d40a7a9b43a3
data8 0x866dca21754096b5, 0xa95ea86b75cc2c20
data8 0xd5647c5b73917370, 0x8694ceb8dfd17a37
data8 0xa98fd141a4992deb, 0xd5a26c4201bd6d13
data8 0x86bc00c49e9307e8, 0xa9c1335cae7446ba
data8 0xd5e0a45015350a7e, 0x86dccd74fce79610
data8 0xa9ea8686f556f645, 0xd614b539c6194104
data8 0x870453c845acf90f, 0xaa1c52d17906bb19
data8 0xd6537310e224283f, 0x872c089a1e90342c
data8 0xaa4e59b046dab887, 0xd6927ab62244c917
data8 0x8753ec4a92d16c5e, 0xaa809b9c60d1890b
data8 0xd6d1ccc1fc4ef4b7, 0x877bff3aca19f6b4
data8 0xaab319102f3f9b33, 0xd71169cea98fdded
data8 0x879d88b6fe1c324c, 0xaadd5a18c1e21274
data8 0xd746a66a5bc9f6d9, 0x87c5f346dbf98c3a
data8 0xab1045f2ac31bdf5, 0xd786ce8f0fae5317
data8 0x87e7c653efacef2c, 0xab3ae3ab2df7231e
data8 0xd7bc7ff214c4e75a, 0x881089d4e73ffefc
data8 0xab6e3f945d1e96fc, 0xd7fd35467a517ed1
data8 0x88397e6a366f2a8a, 0xaba1d953a08fa94e
data8 0xd83e38838648d815, 0x885bc559e5e1c081
data8 0xabcd090db7ef4c3f, 0xd874a1db598b8951
data8 0x887e2ee392bb7a93, 0xabf864602d7c323d
data8 0xd8ab42205b80edaf, 0x88a7a8587e404257
data8 0xac2ca5886ccf9b57, 0xd8ed1849d202f965
data8 0x88ca5eda67594784, 0xac5861d4aa441f0f
data8 0xd92432bd5a173685, 0x88f4356166bd590e
data8 0xac8d183fe3a2fbed, 0xd9669ca45b03c23e
data8 0x89173a0acf5ce026, 0xacb93703ff51571e
data8 0xd99e3327cf89574e, 0x893a62a098b6a57b
data8 0xace5830ad0c3f14b, 0xd9d602b19b100466
data8 0x895daf637236ae2c, 0xad11fca5d78b3ff2
data8 0xda0e0ba86c096841, 0x89883b9d1c2fa9c5
data8 0xad4797fddf91a798, 0xda5195fcdb1c3dce
data8 0x89abd8dd374a5d7b, 0xad747701e559ebcb
data8 0xda8a1eb87a491f6c, 0x89cf9b1dcd197fa0
data8 0xada184a47e9c7613, 0xdac2e230b91c3f84
data8 0x89f382a258ea79de, 0xadcec13ab0dda8ff
data8 0xdafbe0d0b66aea30, 0x8a178faf06648f29
data8 0xadfc2d1a5fd21ba8, 0xdb351b04a8fafced
data8 0x8a3bc288b3e1d18a, 0xae29c89a5053c33a
data8 0xdb6e9139e33cdd8e, 0x8a601b74f4d1f835
data8 0xae5794122b638df9, 0xdba843ded7151ea1
data8 0x8a849aba14274764, 0xae858fda8137ae0a
data8 0xdbe2336319b61fc8, 0x8aa9409f16cdbc9b
data8 0xaeb3bc4ccc56d3d1, 0xdc1c60376789fa68
data8 0x8ace0d6bbe2cb316, 0xaee219c374c09920
data8 0xdc56cacda82d0cd5, 0x8af301688ab33558
data8 0xaf10a899d3235fe7, 0xdc917398f2797814
data8 0x8b181cdebe6f3206, 0xaf3f692c341fe8b4
data8 0xdccc5b0d90a3e628, 0x8b3d60185fafcb7c
data8 0xaf6e5bd7db9ae6c2, 0xdd0781a10469f0f2
data8 0x8b62cb603bb2fad0, 0xaf9d80fb081cd91b
data8 0xdd42e7ca0b52838f, 0x8b80d7d6bc4104de
data8 0xafc35ce063eb3787, 0xdd729ad01c69114d
data8 0x8ba68bf73ac74f39, 0xaff2ddcb5f28f03d
data8 0xddae749c001fbf5e, 0x8bcc68fb9f9f7335
data8 0xb022923b148e05c5, 0xddea8f50a51c69b1
data8 0x8bf26f31c534fca2, 0xb0527a919adbf58b
data8 0xde26eb69a0f0f111, 0x8c10f86e13a1a1f9
data8 0xb078f3ab1d701c65, 0xde576480262399bc
data8 0x8c3749916cc6abb5, 0xb0a93a6870649f31
data8 0xde943789645933c8, 0x8c5dc4c4f7706032
data8 0xb0d9b624d62ec856, 0xded14d58139a28af
data8 0x8c7cac3a8c42e3e0, 0xb100a5f53fb3c8e1
data8 0xdf025c00bbf2b5c7, 0x8ca373f1b7bf2716
data8 0xb131821882f5540a, 0xdf3feb44d723a713
data8 0x8cc29907fb951294, 0xb158bf8e4cb04055
data8 0xdf715bc16c159be0, 0x8ce9ae4e9492aac8
data8 0xb189fd69d56b238f, 0xdfaf66240e29cda8
data8 0x8d0911dddbfdad0e, 0xb1b189958e8108e4
data8 0xdfe139cbf6e19bdc, 0x8d3075c4f20f04ee
data8 0xb1e32a8165b09832, 0xe01fc0fe94d9fc52
data8 0x8d5018a9d4de77d5, 0xb20b0678fc271eec
data8 0xe051f92ffcc0bd60, 0x8d77cc47dd143515
data8 0xb23d0bd3f7592b6e, 0xe090feec9c9a06ac
data8 0x8d97af6352739cb7, 0xb26538b2db8420dc
data8 0xe0c39d0c9ff862d6, 0x8db7af523167800f
data8 0xb28d89e339ceca14, 0xe0f668eeb99f188d
data8 0x8ddfd80bc68c32ff, 0xb2c022ca12e55a16
data8 0xe1362890eb663139, 0x8e00197e1e7c88fe
data8 0xb2e8c6852c6b03f1, 0xe1695c7212aecbaa
data8 0x8e207859f77e20e7, 0xb3118f4eda9fe40f
data8 0xe19cbf0391bbbbe9, 0x8e40f4ce60c9f8e2
data8 0xb33a7d6268109ebe, 0xe1d050901c531e85
data8 0x8e69ba46cf2fde4d, 0xb36ddbc5ea70ec55
data8 0xe2110903b4f4047a, 0x8e8a7a00bd7ae63e
data8 0xb3971e9b39264023, 0xe2450559b4d80b6d
data8 0x8eab57ef1cf2f529, 0xb3c0877ecc18e24a
data8 0xe27931a231554ef3, 0x8ecc5442cffb1dad
data8 0xb3ea16ae3a6c905f, 0xe2ad8e2ac3c5b04b
data8 0x8eed6f2d2a4acbfe, 0xb413cc67aa0e4d2d
data8 0xe2e21b41b9694cce, 0x8f0ea8dff24441ff
data8 0xb43da8e9d163e1af, 0xe316d93615862714
data8 0x8f385c95d696b817, 0xb47233773b84d425
data8 0xe3590bd86a0d30f9, 0x8f59dc43edd930f3
data8 0xb49c6825430fe730, 0xe38e38e38e38e38e
data8 0x8f7b7b5f5ffad1c4, 0xb4c6c46bcdb27dcf
data8 0xe3c397d1e6db7839, 0x8f9d3a1bea165f38
data8 0xb4f1488c0b35d26f, 0xe3f928f5953feb9e
data8 0x8fbf18adc34b66da, 0xb51bf4c7c51f0168
data8 0xe42eeca17c62886c, 0x8fe117499e356095
data8 0xb546c9616087ab9c, 0xe464e32943446305
data8 0x90033624aa685f8d, 0xb571c69bdffd9a70
data8 0xe49b0ce15747a8a2, 0x9025757495f36b86
data8 0xb59cecbae56984c3, 0xe4d16a1eee94e9d4
data8 0x903f3a5dcc091203, 0xb5bd64512bb14bb7
data8 0xe4fa52107353f67d, 0x9061b2fceb2bdbab
data8 0xb5e8d2a4bf5ba416, 0xe5310a471f4d2dc3
data8 0x90844ca7211032a7, 0xb6146a9a1bc47819
data8 0xe567f6f1c2b9c224, 0x90a7079403e6a15d
data8 0xb6402c7749d621c0, 0xe59f18689a9e4c9a
data8 0x90c9e3fbafd63799, 0xb66c1882fb435ea2
data8 0xe5d66f04b8a68ecf, 0x90ece216c8a16ee4
data8 0xb6982f048c999a56, 0xe60dfb2005c192e9
data8 0x9110021e7b516f0a, 0xb6c47044075b4142
data8 0xe645bd1544c7ea51, 0x912a708a39be9075
data8 0xb6e5bd6bfd02bafd, 0xe66fb21b505b20a0
data8 0x914dcc7b31146370, 0xb7124a2736ff8ef2
data8 0xe6a7d32af4a7c59a, 0x91714af8cfe984d5
data8 0xb73f026a01e94177, 0xe6e02b129c6a5ae4
data8 0x918c00a6f3795e97, 0xb760a959f1d0a7a7
data8 0xe70a9136a7403039, 0x91afbc299ed0295d
data8 0xb78dae7e06868ab0, 0xe74349fb2d92a589
data8 0x91d39add3e958db0, 0xb7badff8ad9e4e02
data8 0xe77c3a9c86ed7d42, 0x91ee9920a8974d92
data8 0xb7dce25b8e17ae9f, 0xe7a713f88151518a
data8 0x9212b5fcac537c19, 0xb80a6226904045e2
data8 0xe7e067453317ed2b, 0x9236f6b256923fcf
data8 0xb8380f1cafd73c1c, 0xe819f37a81871bb5
data8 0x92523ee6f90dcfc3, 0xb85a6ea8e321b4d8
data8 0xe8454236bfaeca14, 0x9276bef031e6eb79
data8 0xb8886b684ae7d2fa, 0xe87f32f24c3fc90e
data8 0x929236ec237a24ad, 0xb8ab0726fa00cf5d
data8 0xe8aacd8688892ba6, 0x92b6f70b7efe9dc3
data8 0xb8d954a4d13b7cb1, 0xe8e523fd32f606f7
data8 0x92d29f61eec7dc2b, 0xb8fc2d4f6cd9f04a
data8 0xe9110b5311407927, 0x92f7a05d5b8ba92f
data8 0xb92acc851476b1ab, 0xe94bc8bf0c108fa3
data8 0x931379a403be5c16, 0xb94de2d841a184c2
data8 0xe977fdc439c2ca3c, 0x9338bc44de2e3f34
data8 0xb97cd4c36c92693c, 0xe9b3236528fc349e
data8 0x9354c71412c69486, 0xb9a0297f172665e3
data8 0xe9dfa70b745ac1b4, 0x937a4c273907e262
data8 0xb9cf6f21e36c3924, 0xea1b36268d0eaa38
data8 0x93968919f6e7975d, 0xb9f3030951267208
data8 0xea480963fd394197, 0x93bc516fdd4680c9
data8 0xba229d6a618e7c59, 0xea84034425f27484
data8 0x93d8c123d9be59b2, 0xba467144459f9855
data8 0xeab12713138dd1cc, 0x93f546c955e60076
data8 0xba6a60c3c48f1a4b, 0xeade6db73a5e503b
data8 0x941b70a65879079f, 0xba9a76056b67ee7a
data8 0xeb1b0268343b121b, 0x943829f337410591
data8 0xbabea699563ada6e, 0xeb489b0b2bdb5f14
data8 0x9454f995765bc4d2, 0xbae2f350b262cc4b
data8 0xeb765721e85f03d0, 0x947b86b57f5842ed
data8 0xbb1385a23be24e57, 0xebb389645f222f62
data8 0x94988aeb23470f86, 0xbb3814975e17c680
data8 0xebe198f090607e0c, 0x94b5a5dc9695f42a
data8 0xbb5cc031009bf467, 0xec0fcc9321024509
data8 0x94d2d7a9170d8b42, 0xbb81889680024764
data8 0xec3e247da8b82f61, 0x94f9e87dd78bf019
data8 0xbbb2c0d8703ae95d, 0xec7c27d21321c9f7
data8 0x95175019a503d89e, 0xbbd7cd09ba3c5463
data8 0xecaad5278824e453, 0x9534cefa625fcb3a
data8 0xbbfcf68c4977718f, 0xecd9a76d097d4e77
data8 0x955265405c491a25, 0xbc223d88cfc88eee
data8 0xed089ed5dcd99446, 0x9570130c1f9bb857
data8 0xbc47a2284fee4ff8, 0xed37bb95add09a1c
data8 0x9597ca4119525184, 0xbc79ac0916ed7b8a
data8 0xed76c70508f904b6, 0x95b5af6fb5aa4d3c
data8 0xbc9f5670d1a13030, 0xeda63bb05e7f93c6
data8 0x95d3ac9273aafd7a, 0xbcc51f068cb95c1d
data8 0xedd5d661daed2dc4, 0x95f1c1cafdfd3684
data8 0xbceb05f4b30a9bc0, 0xee05974eef86b903
data8 0x960fef3b430b8d5f, 0xbd110b6604c7d306
data8 0xee357ead791fc670, 0x962e350575b409c5
data8 0xbd372f8598620f19, 0xee658cb3c134a463
data8 0x964c934c0dfc1708, 0xbd5d727edb6b3c7e
data8 0xee95c1987f080211, 0x966b0a31c9c6bc7d
data8 0xbd83d47d937bbc6d, 0xeec61d92d8c4314f
data8 0x968999d9ad8d264e, 0xbdaa55addf1ae47d
data8 0xeef6a0da64a014ac, 0x96a8426705198795
data8 0xbdd0f63c36aa73f0, 0xef274ba72a07c811
data8 0x96c703fd64445ee5, 0xbdf7b6556d550a15
data8 0xef581e31a2c91260, 0x96e5dec0a7b4268d
data8 0xbe1e9626b1ffa96b, 0xef8918b2bc43aec6
data8 0x9704d2d4f59f79f3, 0xbe4595dd903e5371
data8 0xefba3b63d89d7cbf, 0x9723e05ebe91b9b0
data8 0xbe6cb5a7f14bc935, 0xefeb867ecffaa607
data8 0x97430782be323831, 0xbe93f5b41d047cf7
data8 0xf01cfa3df1b9c9fa, 0x97624865fc0df8bf
data8 0xbebb5630bae4c15f, 0xf04e96dc05b43e2d
data8 0x9781a32dcc640b2a, 0xbee2d74cd30a430c
data8 0xf0805c944d827454, 0x97a117ffd0f48e46
data8 0xbf0a7937cf38d981, 0xf0b24ba285c495cb
data8 0x97c0a701f9d263c9, 0xbf323c217be2bc8c
data8 0xf0e46442e76f6569, 0x97e0505a8637a036
data8 0xbf5a203a09342bbb, 0xf116a6b2291d7896
data8 0x97f57a9fb0b08c6e, 0xbf74cad1c14ebfc4
data8 0xf1383fa9e9b5b381, 0x9815503365914a9d
data8 0xbf9ce6a497a89f78, 0xf16ac84f90083b9b
data8 0x98354085054fd204, 0xbfc52428bec6e72f
data8 0xf19d7b686dcb03d7, 0x98554bbbf8a77902
data8 0xbfed838fddab024b, 0xf1d0593311db1757
data8 0x987571fffb7f94f6, 0xc016050c0420981a
data8 0xf20361ee8f1c711e, 0x9895b3791dd03c23
data8 0xc03ea8cfabddc330, 0xf23695da7de51d3f
data8 0x98ab43a5fc65d0c8, 0xc059d3cbd65ddbce
data8 0xf258d095e465cc35, 0x98cbb2d196bd713d
data8 0xc082b122a3c78c9d, 0xf28c4d0bfc982b34
data8 0x98ec3d9ec7b6f21a, 0xc0abb1499ae736c4
data8 0xf2bff55eb3f0ea71, 0x990ce436db5e8344
data8 0xc0d4d474c3aedaaf, 0xf2f3c9cf9884636e
data8 0x9922b8218160967a, 0xc0f054ca33eb3437
data8 0xf31670135ab9cc0f, 0x99438d686f75779d
data8 0xc119b2c67e600ed0, 0xf34a8e9f0b54cdfb
data8 0x99647eea131fa20b, 0xc1433453de2033ff
data8 0xf37ed9fa6b8add3f, 0x997a85045a47c6d0
data8 0xc15ef3e44e10032d, 0xf3a1cfe884ef6bb6
data8 0x999ba5f14f8add02, 0xc188b130431d80e6
data8 0xf3d66689dcc8e8d3, 0x99bce38b5465ecae
data8 0xc1b2929d6067730e, 0xf40b2ab069d5c96a
data8 0x99d31ca0887f30f9, 0xc1ce9268f31cc734
data8 0xf42e718b90c8bc16, 0x99f48a669c74c09e
data8 0xc1f8b0877c1b0c08, 0xf463822a0a3b4b00
data8 0x9a16154eb445c873, 0xc222f35a87b415ba
data8 0xf498c1076015faf8, 0x9a2c822ec198d667
data8 0xc23f3467349e5c88, 0xf4bc5a19a33990b5
data8 0x9a4e3e080cd91b78, 0xc269b4e40e088c01
data8 0xf4f1e6a7d6f5425f, 0x9a70177afe52322e
data8 0xc2945aac24daaf6e, 0xf527a232cf6be334
data8 0x9a86b8fa94eebe10, 0xc2b0de05e43c1d66
data8 0xf54b8ecdcda90851, 0x9aa8c42866ae2958
data8 0xc2dbc275e1229d09, 0xf5819949c7ad87b4
data8 0x9abf86f9e12fc45e, 0xc2f86fca9d80eeff
data8 0xf5a5bac9213b48a9, 0x9ae1c462fc05f49d
data8 0xc323938449a2587e, 0xf5dc1501f324a812
data8 0x9af8a8dc936b84d0, 0xc3406b40a538ed20
data8 0xf6006bee86b5589e, 0x9b1b19033be35730
data8 0xc36bcee8211d15e0, 0xf63716b2fa067fa4
data8 0x9b3da7daf04c2892, 0xc397593adf2ba366
data8 0xf66df22fb6132b9c, 0x9b54c2e4c8a9012b
data8 0xc3b475b6206155d5, 0xf6929fb98225deb1
data8 0x9b77854e6c661200, 0xc3e0410243b97383
data8 0xf6c9cd13021e3fea, 0x9b8ec2e678d56d2f
data8 0xc3fd890709833d37, 0xf6eeb177472cedae
data8 0x9ba60e6a5ca133b6, 0xc41ae295f7e7fa06
data8 0xf713abf4cb0b3afb, 0x9bc919ea66a151a4
data8 0xc44709f7bb8a4dd2, 0xf74b4d5333684ef1
data8 0x9be0887c09ef82bb, 0xc4648fb0e0bec4c1
data8 0xf7707f75a72f8e94, 0x9c03c8d5fffc3503
data8 0xc490f9a94695ba14, 0xf7a874b97927af44
data8 0x9c1b5ad21a81cbb9, 0xc4aeac0173b7d390
data8 0xf7cddf140aedf1d8, 0x9c3ed09216e9ca02
data8 0xc4db5941007aa853, 0xf806291bacb7f7a9
data8 0x9c568656c0423def, 0xc4f938aec206291a
data8 0xf82bcc43b92eafef, 0x9c7a320af242ce60
data8 0xc52629e899dfd622, 0xf8646bf0defb759e
data8 0x9c920bf7a8c01dc2, 0xc54436e44043b965
data8 0xf88a487dfc3ff5f7, 0x9ca9f475d98b159c
data8 0xc562563abf9ea07f, 0xf8b03c2b46cdc17f
data8 0x9ccdeca60e80b5f8, 0xc58fa7d1dc42921c
data8 0xf8e95541c152ae7a, 0x9ce5f9d4653d4902
data8 0xc5adf561b91e110a, 0xf90f832c2700c160
data8 0x9cfe15cb38bfdd8e, 0xc5cc5591bdbd82fa
data8 0xf935c88e0c7f419b, 0x9d225b983f6c1f96
data8 0xc5fa08f1ff20593c, 0xf96f5cd84fd86873
data8 0x9d3a9cca32261ed7, 0xc618980a79ce6862
data8 0xf995dd53ebdd9d6d, 0x9d52ecfccebe1768
data8 0xc6373a09e34b50fa, 0xf9bc75a034436a41
data8 0x9d77818d95b82f86, 0xc66550a6e0baaf35
data8 0xf9f686f26d5518de, 0x9d8ff7893fa4706c
data8 0xc6842241926342c9, 0xfa1d5b39b910a8c5
data8 0x9da87cbef36f2a5e, 0xc6a3070b7c93bb9e
data8 0xfa4447acc4ecbfd2, 0x9dcd6140b4a35aeb
data8 0xc6d18260bb84081b, 0xfa7ed7e51e6fdfb4
data8 0x9de60cd06dc6e2d4, 0xc6f0977c9416828b
data8 0xfaa601394d49a1a0, 0x9dfec7d4cc43b76f
data8 0xc70fc0117c641630, 0xfacd431644ce0e40
data8 0x9e17925ec9fccc4a, 0xc72efc34d7e615be
data8 0xfaf49d96f7a75909, 0x9e3cdf6db57dc075
data8 0xc75dfb441594141e, 0xfb2fd3c65e562fd5
data8 0x9e55d110b63637a8, 0xc77d68aa019bda4c
data8 0xfb576c5762024805, 0x9e6ed27594550d2e
data8 0xc79ce9ea478dbc4f, 0xfb7f1debc22c4040
data8 0x9e87e3adc385d393, 0xc7bc7f1ae453219d
data8 0xfba6e89f32d0190a, 0x9ead9b54b37a1055
data8 0xc7ec0476e15e141a, 0xfbe2c803a0894893
data8 0x9ec6d46a3d7de215, 0xc80bcbe16f1d540f
data8 0xfc0ad1ff0ed9ecf0, 0x9ee01d9108be3154
data8 0xc82ba78a5d349735, 0xfc32f57bdfbcbe7f
data8 0x9ef976db07288d04, 0xc84b978847a06b87
data8 0xfc5b32968f99b21c, 0x9f12e05a4759ec25
data8 0xc86b9bf1ee817bc6, 0xfc83896bc861ab08
data8 0x9f2c5a20f4da6668, 0xc88bb4de3667cdf4
data8 0xfcabfa1861ed4815, 0x9f52af78ed1733ca
data8 0xc8bc00e7fe9e23a3, 0xfce8d3cea7d3163e
data8 0x9f6c52426a39d003, 0xc8dc4d7ff2d25232
data8 0xfd118595143ee273, 0x9f860593d42fd7f3
data8 0xc8fcaeebcb40eb47, 0xfd3a519943d4865a
data8 0x9f9fc97fdb96bd51, 0xc91d25431426a663
data8 0xfd6337f8e1ae5a4b, 0x9fb99e194f4a7037
data8 0xc93db09d7fdb2949, 0xfd8c38d1c8e927eb
data8 0x9fd383731ca51db9, 0xc95e5112e721582a
data8 0xfdb5544205095a53, 0x9fed79a04fbf9423
data8 0xc97f06bb49787677, 0xfdde8a67d2613531
data8 0xa00780b413b24ee8, 0xc99fd1aecd6e1b06
data8 0xfe07db619e781611, 0xa02eab2c4474b0cd
data8 0xc9d12a3e27bb1625, 0xfe460768d80bf758
data8 0xa048dcd51ccfd142, 0xc9f22ad82ba3d5f0
data8 0xfe6f9bfb06cd32f6, 0xa0631fa894b11b8d
data8 0xca134113105e67b2, 0xfe994bcd3d14fcc2
data8 0xa07d73ba65e680af, 0xca346d07b045a876
data8 0xfec316fecaf3f2ab, 0xa097d91e6aaf71b0
data8 0xca55aecf0e94bb88, 0xfeecfdaf33fadb80
data8 0xa0b24fe89e02602f, 0xca77068257be9bab
data8 0xff16fffe2fa8fad6, 0xa0ccd82d1bd2f68b
data8 0xca98743ae1c693a8, 0xff411e0ba9db886d
data8 0xa0e77200215909e6, 0xcab9f8122c99a101
data8 0xff6b57f7c33e4e9a, 0xa1021d760d584855
data8 0xcadb9221e268c3b5, 0xff95ade2d1bd7358
data8 0xa11cdaa36068a57d, 0xcafd4283d8043dfd
data8 0xffc01fed60f86fb5, 0xa137a99cbd3f880b
data8 0xcb1f09520d37c6fb, 0xffeaae3832b63956
ASM_SIZE_DIRECTIVE(T_table)
.align 32
.global cbrtf#
.section .text
.proc cbrtf#
.align 32
cbrtf:
{ .mfi
getf.sig r28=f8
// will continue only for normal/denormal numbers
(p0) fclass.nm.unc p12,p7 = f8, 0x1b
// r2 = pointer to C_1,C_2 followed by T_table
addl r2 = @ltoff(poly_coeffs), gp
}
{.mfi
// r29=bias-((2^8-1)/3) -63=0xffff-0x55-0x3f=0xff6b
mov r29=0xff6b
// normalize a
fma.s1 f14=f8,f1,f0
nop.i 0;;
}
{.mib
nop.m 0
(p7) cmp.eq p12,p0=r28,r0
nop.b 0;;
}
{.mfb
// load start address for C_1,C_2 followed by T_table
ld8 r2=[r2]
(p12) fma.s.s0 f8=f8,f1,f0
(p12) br.ret.spnt b0;;
}
{.mmf
// load C_1
ldfe f7=[r2],16
nop.m 0
// y=frcpa(a)
frcpa.s0 f8,p6=f1,f8;;
}
{.mmi
// load C_2
ldfe f9=[r2],16
// r28=bias-(2^8-1)
mov r28=0xff00
nop.i 0;;
}
{.mmi
// get normalized significand
getf.sig r23=f14
// get exponent
getf.exp r24=f14
mov r25=0x20000;;
}
{.mii
// get r26=sign
and r26=r24,r25
// eliminate leading 1 from r23=1st table index
shl r23=r23,1
// eliminate sign from exponent (r25)
andcm r25=r24,r25;;
}
{.mfi
// subtract bias from r25=exponent
sub r25=r25,r28
// r=1-a*y
(p6) fnma.s1 f6=f8,f14,f1
// r23=1st table index (y_index8 bits)
shr.u r23=r23,56;;
}
{.mii
// 1: exponent*=5; // (2^{16}-1)/3=0x5555
shladd r24=r25,2,r25
// r23=3*y_index
shladd r23=r23,1,r23;;
// r30=(5*expon)*16+5*expon=(0x55)*expon
shladd r30=r24,4,r24;;
}
{.mmi
// adjust T_table pointer by 1st index
shladd r2=r23,3,r2;;
// f10=T[0][y]
(p6) ldf8 f10=[r2],8
// r24=(0x5500)*expon
shl r24=r30,8;;
}
{.mfi
// f11=T[1][y]
(p6) ldf8 f11=[r2],8
// P_1=C_1+C_2*r
(p6) fma.s1 f7=f9,f6,f7
// r24=(0x5555)*expon
add r24=r24,r30;;
}
{.mmi
// r24=(0x5556)*expon // 0x5556=(2^{16}+2)/3
add r24=r24,r25;;
// f8=T[2][y]
(p6) ldf8 f8=[r2]
// r24=floor(expon/3)
shr r24=r24,16;;
}
{.mmi
nop.m 0
// r28=3*exponent
shladd r28=r24,1,r24
// bias exponent
add r24=r29,r24;;
}
{.mmi
// get remainder of exponent/3
sub r25=r25,r28
// add sign to exponent
or r24=r24,r26
nop.i 0;;
}
{.mfi
nop.m 0
// P_2=-r*P_1
(p6) fnma.s1 f6=f7,f6,f0
// remainder=0 ?
(p6) cmp.eq.unc p7,p8=r0,r25;;
}
{.mfi
// f14=sign*2^{exponent/3}
(p6) setf.exp f14=r24
nop.f 0
// remainder = 1 ?
(p8) cmp.eq.unc p8,p12=1,r25;;
}
.pred.rel "mutex",p7,p8
{.mfi
nop.m 0
// remainder=0 -> use T=f10
(p7) fma.s1 f8=f10,f6,f10
nop.i 0
}
{.mfi
nop.m 0
// remainder =1 -> use f11
(p8) fma.s1 f8=f11,f6,f11
nop.i 0;;
}
{.mfi
nop.m 0
// result=T+T*P_2
(p12) fma.s.s0 f8=f8,f6,f8
nop.i 0;;
}
{.mfb
nop.m 0
// T*=sgn*2^{expon/3}
(p6) fma.s.s0 f8=f8,f14,f0
br.ret.sptk b0;;
}
.endp cbrtf
ASM_SIZE_DIRECTIVE(cbrtf)

889
sysdeps/ia64/fpu/s_cbrtl.S Normal file
View File

@ -0,0 +1,889 @@
.file "cbrtl.asm"
// Copyright (c) 2000, 2001, Intel Corporation
// All rights reserved.
//
// Contributed 2/2/2000 by John Harrison, Cristina Iordache, Ted Kubaska,
// Bob Norin, Shane Story, and Ping Tak Peter Tang
// of the Computational Software Lab, Intel Corporation.
//
// WARRANTY DISCLAIMER
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
// http://developer.intel.com/opensource.
//
// History
//==============================================================
// 4/28/00: Initial version
//
// API
//==============================================================
// long double cbrtl(long double)
//
// Overview of operation
//==============================================================
// Background
//
// Implementation
//
// cbrt(a) = cbrt(a y) / cbrt(y)
// = cbrt(1 - (1 - a y)) * 1/cbrt(y)
//
// where y = frcpa(a).
//
// * cbrt(1 - (1 - a y)) is approximated by a degree-6 polynomial
//
// 1 - c_1 r - c_2 * r^2 - c_3 * r^3 - c_4 * r^4 - c_5 * r^5 - c_6 * r^6
//
// in r = 1 - a y.
//
// * The values 1/cbrt(y) are stored as two tables of constants T_hi
// (double-extended precision) and D (single precision) as follows:
//
// T_hi (1 + D) = 1/cbrt(y) to about 80 bits of accuracy
//
// The tables are only stored for three exponent values and are
// then multiplied by e/3 where e is the exponent of the input number.
// This computation is carried out in parallel with the polynomial
// evaluation:
//
// T = 2^(e/3) * T_hi
//===============
// input = x
// C = frcpa(x)
// r = C * x - 1
//
// Special values
//==============================================================
// Registers used
//==============================================================
// f6-f15
// r2-r3, r23-r30
// p6,p7,p12
#include "libm_support.h"
// Data tables
//==============================================================
#ifdef _LIBC
.rodata
#else
.data
#endif
.align 16
poly_coeffs:
ASM_TYPE_DIRECTIVE(poly_coeffs,@object)
data8 0xaaaaaaaaaaaaaab1, 0x00003ffd // C_1
data8 0xe38e38e38e38e3e0, 0x00003ffb // C_2
data8 0x3faf9add3c0be9a6, 0x3fa511e8d2b1f749 // C_3, C_4
data8 0x3f9ee71b2c6ebe99, 0x3f9809180fd0340c // C_5, C_6
ASM_SIZE_DIRECTIVE(poly_coeffs)
T_table:
ASM_TYPE_DIRECTIVE(T_table,@object)
data8 0x80155c748c374836, 0x8040404b0879f7f9
data8 0x806b5dce4b405c10, 0x8096b586974669b1
data8 0x80bcd273d952a028, 0x80e898c52813f2f3
data8 0x81149add67c2d208, 0x813b4e2c856b6e9a
data8 0x8167c1dde03de7aa, 0x818ed973b811135e
data8 0x81bbc0c33e13ec98, 0x81e33e69fbe7504a
data8 0x820aec524e3c23e9, 0x823880f78e70b805
data8 0x826097a62a8e5200, 0x8288dfe00e9b5eaf
data8 0x82b15a10c5371624, 0x82da06a527b18937
data8 0x8302e60b635ab394, 0x832bf8b2feec2f0e
data8 0x83553f0ce00e276b, 0x837eb98b50f8322a
data8 0x83a270f44c84f699, 0x83cc4d7cfcfac5ca
data8 0x83f65f78a8872b4c, 0x8420a75f2f7b53c8
data8 0x844510461ff14209, 0x846fbd91b930bed2
data8 0x84947e18234f3294, 0x84bf92755825045a
data8 0x84e4ac0ee112ba51, 0x8509ef44b86f20be
data8 0x85359d5d91768427, 0x855b3bd5b7384357
data8 0x858104f0c415f79a, 0x85a6f90390d29864
data8 0x85d3772fcd56a1dd, 0x85f9c982fcc002f3
data8 0x862047e0e7ea554b, 0x8646f2a26f7f5852
data8 0x866dca21754096b5, 0x8694ceb8dfd17a37
data8 0x86bc00c49e9307e8, 0x86dccd74fce79610
data8 0x870453c845acf90f, 0x872c089a1e90342c
data8 0x8753ec4a92d16c5e, 0x877bff3aca19f6b4
data8 0x879d88b6fe1c324c, 0x87c5f346dbf98c3a
data8 0x87e7c653efacef2c, 0x881089d4e73ffefc
data8 0x88397e6a366f2a8a, 0x885bc559e5e1c081
data8 0x887e2ee392bb7a93, 0x88a7a8587e404257
data8 0x88ca5eda67594784, 0x88f4356166bd590e
data8 0x89173a0acf5ce026, 0x893a62a098b6a57b
data8 0x895daf637236ae2c, 0x89883b9d1c2fa9c5
data8 0x89abd8dd374a5d7b, 0x89cf9b1dcd197fa0
data8 0x89f382a258ea79de, 0x8a178faf06648f29
data8 0x8a3bc288b3e1d18a, 0x8a601b74f4d1f835
data8 0x8a849aba14274764, 0x8aa9409f16cdbc9b
data8 0x8ace0d6bbe2cb316, 0x8af301688ab33558
data8 0x8b181cdebe6f3206, 0x8b3d60185fafcb7c
data8 0x8b62cb603bb2fad0, 0x8b80d7d6bc4104de
data8 0x8ba68bf73ac74f39, 0x8bcc68fb9f9f7335
data8 0x8bf26f31c534fca2, 0x8c10f86e13a1a1f9
data8 0x8c3749916cc6abb5, 0x8c5dc4c4f7706032
data8 0x8c7cac3a8c42e3e0, 0x8ca373f1b7bf2716
data8 0x8cc29907fb951294, 0x8ce9ae4e9492aac8
data8 0x8d0911dddbfdad0e, 0x8d3075c4f20f04ee
data8 0x8d5018a9d4de77d5, 0x8d77cc47dd143515
data8 0x8d97af6352739cb7, 0x8db7af523167800f
data8 0x8ddfd80bc68c32ff, 0x8e00197e1e7c88fe
data8 0x8e207859f77e20e7, 0x8e40f4ce60c9f8e2
data8 0x8e69ba46cf2fde4d, 0x8e8a7a00bd7ae63e
data8 0x8eab57ef1cf2f529, 0x8ecc5442cffb1dad
data8 0x8eed6f2d2a4acbfe, 0x8f0ea8dff24441ff
data8 0x8f385c95d696b817, 0x8f59dc43edd930f3
data8 0x8f7b7b5f5ffad1c4, 0x8f9d3a1bea165f38
data8 0x8fbf18adc34b66da, 0x8fe117499e356095
data8 0x90033624aa685f8d, 0x9025757495f36b86
data8 0x903f3a5dcc091203, 0x9061b2fceb2bdbab
data8 0x90844ca7211032a7, 0x90a7079403e6a15d
data8 0x90c9e3fbafd63799, 0x90ece216c8a16ee4
data8 0x9110021e7b516f0a, 0x912a708a39be9075
data8 0x914dcc7b31146370, 0x91714af8cfe984d5
data8 0x918c00a6f3795e97, 0x91afbc299ed0295d
data8 0x91d39add3e958db0, 0x91ee9920a8974d92
data8 0x9212b5fcac537c19, 0x9236f6b256923fcf
data8 0x92523ee6f90dcfc3, 0x9276bef031e6eb79
data8 0x929236ec237a24ad, 0x92b6f70b7efe9dc3
data8 0x92d29f61eec7dc2b, 0x92f7a05d5b8ba92f
data8 0x931379a403be5c16, 0x9338bc44de2e3f34
data8 0x9354c71412c69486, 0x937a4c273907e262
data8 0x93968919f6e7975d, 0x93bc516fdd4680c9
data8 0x93d8c123d9be59b2, 0x93f546c955e60076
data8 0x941b70a65879079f, 0x943829f337410591
data8 0x9454f995765bc4d2, 0x947b86b57f5842ed
data8 0x94988aeb23470f86, 0x94b5a5dc9695f42a
data8 0x94d2d7a9170d8b42, 0x94f9e87dd78bf019
data8 0x95175019a503d89e, 0x9534cefa625fcb3a
data8 0x955265405c491a25, 0x9570130c1f9bb857
data8 0x9597ca4119525184, 0x95b5af6fb5aa4d3c
data8 0x95d3ac9273aafd7a, 0x95f1c1cafdfd3684
data8 0x960fef3b430b8d5f, 0x962e350575b409c5
data8 0x964c934c0dfc1708, 0x966b0a31c9c6bc7d
data8 0x968999d9ad8d264e, 0x96a8426705198795
data8 0x96c703fd64445ee5, 0x96e5dec0a7b4268d
data8 0x9704d2d4f59f79f3, 0x9723e05ebe91b9b0
data8 0x97430782be323831, 0x97624865fc0df8bf
data8 0x9781a32dcc640b2a, 0x97a117ffd0f48e46
data8 0x97c0a701f9d263c9, 0x97e0505a8637a036
data8 0x97f57a9fb0b08c6e, 0x9815503365914a9d
data8 0x98354085054fd204, 0x98554bbbf8a77902
data8 0x987571fffb7f94f6, 0x9895b3791dd03c23
data8 0x98ab43a5fc65d0c8, 0x98cbb2d196bd713d
data8 0x98ec3d9ec7b6f21a, 0x990ce436db5e8344
data8 0x9922b8218160967a, 0x99438d686f75779d
data8 0x99647eea131fa20b, 0x997a85045a47c6d0
data8 0x999ba5f14f8add02, 0x99bce38b5465ecae
data8 0x99d31ca0887f30f9, 0x99f48a669c74c09e
data8 0x9a16154eb445c873, 0x9a2c822ec198d667
data8 0x9a4e3e080cd91b78, 0x9a70177afe52322e
data8 0x9a86b8fa94eebe10, 0x9aa8c42866ae2958
data8 0x9abf86f9e12fc45e, 0x9ae1c462fc05f49d
data8 0x9af8a8dc936b84d0, 0x9b1b19033be35730
data8 0x9b3da7daf04c2892, 0x9b54c2e4c8a9012b
data8 0x9b77854e6c661200, 0x9b8ec2e678d56d2f
data8 0x9ba60e6a5ca133b6, 0x9bc919ea66a151a4
data8 0x9be0887c09ef82bb, 0x9c03c8d5fffc3503
data8 0x9c1b5ad21a81cbb9, 0x9c3ed09216e9ca02
data8 0x9c568656c0423def, 0x9c7a320af242ce60
data8 0x9c920bf7a8c01dc2, 0x9ca9f475d98b159c
data8 0x9ccdeca60e80b5f8, 0x9ce5f9d4653d4902
data8 0x9cfe15cb38bfdd8e, 0x9d225b983f6c1f96
data8 0x9d3a9cca32261ed7, 0x9d52ecfccebe1768
data8 0x9d77818d95b82f86, 0x9d8ff7893fa4706c
data8 0x9da87cbef36f2a5e, 0x9dcd6140b4a35aeb
data8 0x9de60cd06dc6e2d4, 0x9dfec7d4cc43b76f
data8 0x9e17925ec9fccc4a, 0x9e3cdf6db57dc075
data8 0x9e55d110b63637a8, 0x9e6ed27594550d2e
data8 0x9e87e3adc385d393, 0x9ead9b54b37a1055
data8 0x9ec6d46a3d7de215, 0x9ee01d9108be3154
data8 0x9ef976db07288d04, 0x9f12e05a4759ec25
data8 0x9f2c5a20f4da6668, 0x9f52af78ed1733ca
data8 0x9f6c52426a39d003, 0x9f860593d42fd7f3
data8 0x9f9fc97fdb96bd51, 0x9fb99e194f4a7037
data8 0x9fd383731ca51db9, 0x9fed79a04fbf9423
data8 0xa00780b413b24ee8, 0xa02eab2c4474b0cd
data8 0xa048dcd51ccfd142, 0xa0631fa894b11b8d
data8 0xa07d73ba65e680af, 0xa097d91e6aaf71b0
data8 0xa0b24fe89e02602f, 0xa0ccd82d1bd2f68b
data8 0xa0e77200215909e6, 0xa1021d760d584855
data8 0xa11cdaa36068a57d, 0xa137a99cbd3f880b
data8 0xa160019ed37fb4ae, 0xa1960b5966da4608
data8 0xa1cc5dbe6dc2aab4, 0xa202f97995b69c0d
data8 0xa232fe6eb0c0577d, 0xa26a2582012f6e17
data8 0xa2a197e5d10465cb, 0xa2d25a532efefbc8
data8 0xa30a5bd6e49e4ab8, 0xa33b9c9b59879e24
data8 0xa3742fca6a3c1f21, 0xa3a5f1273887bf22
data8 0xa3d7ef508ff11574, 0xa4115ce30548bc15
data8 0xa443df0e53df577a, 0xa4769fa5913c0ec3
data8 0xa4a99f303bc7def5, 0xa4dcde37779adf4b
data8 0xa5105d46152c938a, 0xa5441ce89825cb8d
data8 0xa5781dad3e54d899, 0xa5ac602406c4e68c
data8 0xa5d9601d95c2c0bc, 0xa60e1e1a2de14745
data8 0xa6431f6e3fbd9658, 0xa67864b0d432fda4
data8 0xa6a6444aa0243c0b, 0xa6dc094d10f25792
data8 0xa70a574cc02bba69, 0xa7409e2af9549084
data8 0xa76f5c64ca2cf13b, 0xa79e4f0babab5dc0
data8 0xa7d5579ae5164b85, 0xa804bd3c6fe61cc8
data8 0xa8345895e5250a5a, 0xa8642a122b44ef0b
data8 0xa89c38ca18f6108b, 0xa8cc81063b6e87ca
data8 0xa8fd00bfa409285e, 0xa92db8664d5516da
data8 0xa95ea86b75cc2c20, 0xa98fd141a4992deb
data8 0xa9c1335cae7446ba, 0xa9ea8686f556f645
data8 0xaa1c52d17906bb19, 0xaa4e59b046dab887
data8 0xaa809b9c60d1890b, 0xaab319102f3f9b33
data8 0xaadd5a18c1e21274, 0xab1045f2ac31bdf5
data8 0xab3ae3ab2df7231e, 0xab6e3f945d1e96fc
data8 0xaba1d953a08fa94e, 0xabcd090db7ef4c3f
data8 0xabf864602d7c323d, 0xac2ca5886ccf9b57
data8 0xac5861d4aa441f0f, 0xac8d183fe3a2fbed
data8 0xacb93703ff51571e, 0xace5830ad0c3f14b
data8 0xad11fca5d78b3ff2, 0xad4797fddf91a798
data8 0xad747701e559ebcb, 0xada184a47e9c7613
data8 0xadcec13ab0dda8ff, 0xadfc2d1a5fd21ba8
data8 0xae29c89a5053c33a, 0xae5794122b638df9
data8 0xae858fda8137ae0a, 0xaeb3bc4ccc56d3d1
data8 0xaee219c374c09920, 0xaf10a899d3235fe7
data8 0xaf3f692c341fe8b4, 0xaf6e5bd7db9ae6c2
data8 0xaf9d80fb081cd91b, 0xafc35ce063eb3787
data8 0xaff2ddcb5f28f03d, 0xb022923b148e05c5
data8 0xb0527a919adbf58b, 0xb078f3ab1d701c65
data8 0xb0a93a6870649f31, 0xb0d9b624d62ec856
data8 0xb100a5f53fb3c8e1, 0xb131821882f5540a
data8 0xb158bf8e4cb04055, 0xb189fd69d56b238f
data8 0xb1b189958e8108e4, 0xb1e32a8165b09832
data8 0xb20b0678fc271eec, 0xb23d0bd3f7592b6e
data8 0xb26538b2db8420dc, 0xb28d89e339ceca14
data8 0xb2c022ca12e55a16, 0xb2e8c6852c6b03f1
data8 0xb3118f4eda9fe40f, 0xb33a7d6268109ebe
data8 0xb36ddbc5ea70ec55, 0xb3971e9b39264023
data8 0xb3c0877ecc18e24a, 0xb3ea16ae3a6c905f
data8 0xb413cc67aa0e4d2d, 0xb43da8e9d163e1af
data8 0xb47233773b84d425, 0xb49c6825430fe730
data8 0xb4c6c46bcdb27dcf, 0xb4f1488c0b35d26f
data8 0xb51bf4c7c51f0168, 0xb546c9616087ab9c
data8 0xb571c69bdffd9a70, 0xb59cecbae56984c3
data8 0xb5bd64512bb14bb7, 0xb5e8d2a4bf5ba416
data8 0xb6146a9a1bc47819, 0xb6402c7749d621c0
data8 0xb66c1882fb435ea2, 0xb6982f048c999a56
data8 0xb6c47044075b4142, 0xb6e5bd6bfd02bafd
data8 0xb7124a2736ff8ef2, 0xb73f026a01e94177
data8 0xb760a959f1d0a7a7, 0xb78dae7e06868ab0
data8 0xb7badff8ad9e4e02, 0xb7dce25b8e17ae9f
data8 0xb80a6226904045e2, 0xb8380f1cafd73c1c
data8 0xb85a6ea8e321b4d8, 0xb8886b684ae7d2fa
data8 0xb8ab0726fa00cf5d, 0xb8d954a4d13b7cb1
data8 0xb8fc2d4f6cd9f04a, 0xb92acc851476b1ab
data8 0xb94de2d841a184c2, 0xb97cd4c36c92693c
data8 0xb9a0297f172665e3, 0xb9cf6f21e36c3924
data8 0xb9f3030951267208, 0xba229d6a618e7c59
data8 0xba467144459f9855, 0xba6a60c3c48f1a4b
data8 0xba9a76056b67ee7a, 0xbabea699563ada6e
data8 0xbae2f350b262cc4b, 0xbb1385a23be24e57
data8 0xbb3814975e17c680, 0xbb5cc031009bf467
data8 0xbb81889680024764, 0xbbb2c0d8703ae95d
data8 0xbbd7cd09ba3c5463, 0xbbfcf68c4977718f
data8 0xbc223d88cfc88eee, 0xbc47a2284fee4ff8
data8 0xbc79ac0916ed7b8a, 0xbc9f5670d1a13030
data8 0xbcc51f068cb95c1d, 0xbceb05f4b30a9bc0
data8 0xbd110b6604c7d306, 0xbd372f8598620f19
data8 0xbd5d727edb6b3c7e, 0xbd83d47d937bbc6d
data8 0xbdaa55addf1ae47d, 0xbdd0f63c36aa73f0
data8 0xbdf7b6556d550a15, 0xbe1e9626b1ffa96b
data8 0xbe4595dd903e5371, 0xbe6cb5a7f14bc935
data8 0xbe93f5b41d047cf7, 0xbebb5630bae4c15f
data8 0xbee2d74cd30a430c, 0xbf0a7937cf38d981
data8 0xbf323c217be2bc8c, 0xbf5a203a09342bbb
data8 0xbf74cad1c14ebfc4, 0xbf9ce6a497a89f78
data8 0xbfc52428bec6e72f, 0xbfed838fddab024b
data8 0xc016050c0420981a, 0xc03ea8cfabddc330
data8 0xc059d3cbd65ddbce, 0xc082b122a3c78c9d
data8 0xc0abb1499ae736c4, 0xc0d4d474c3aedaaf
data8 0xc0f054ca33eb3437, 0xc119b2c67e600ed0
data8 0xc1433453de2033ff, 0xc15ef3e44e10032d
data8 0xc188b130431d80e6, 0xc1b2929d6067730e
data8 0xc1ce9268f31cc734, 0xc1f8b0877c1b0c08
data8 0xc222f35a87b415ba, 0xc23f3467349e5c88
data8 0xc269b4e40e088c01, 0xc2945aac24daaf6e
data8 0xc2b0de05e43c1d66, 0xc2dbc275e1229d09
data8 0xc2f86fca9d80eeff, 0xc323938449a2587e
data8 0xc3406b40a538ed20, 0xc36bcee8211d15e0
data8 0xc397593adf2ba366, 0xc3b475b6206155d5
data8 0xc3e0410243b97383, 0xc3fd890709833d37
data8 0xc41ae295f7e7fa06, 0xc44709f7bb8a4dd2
data8 0xc4648fb0e0bec4c1, 0xc490f9a94695ba14
data8 0xc4aeac0173b7d390, 0xc4db5941007aa853
data8 0xc4f938aec206291a, 0xc52629e899dfd622
data8 0xc54436e44043b965, 0xc562563abf9ea07f
data8 0xc58fa7d1dc42921c, 0xc5adf561b91e110a
data8 0xc5cc5591bdbd82fa, 0xc5fa08f1ff20593c
data8 0xc618980a79ce6862, 0xc6373a09e34b50fa
data8 0xc66550a6e0baaf35, 0xc6842241926342c9
data8 0xc6a3070b7c93bb9e, 0xc6d18260bb84081b
data8 0xc6f0977c9416828b, 0xc70fc0117c641630
data8 0xc72efc34d7e615be, 0xc75dfb441594141e
data8 0xc77d68aa019bda4c, 0xc79ce9ea478dbc4f
data8 0xc7bc7f1ae453219d, 0xc7ec0476e15e141a
data8 0xc80bcbe16f1d540f, 0xc82ba78a5d349735
data8 0xc84b978847a06b87, 0xc86b9bf1ee817bc6
data8 0xc88bb4de3667cdf4, 0xc8bc00e7fe9e23a3
data8 0xc8dc4d7ff2d25232, 0xc8fcaeebcb40eb47
data8 0xc91d25431426a663, 0xc93db09d7fdb2949
data8 0xc95e5112e721582a, 0xc97f06bb49787677
data8 0xc99fd1aecd6e1b06, 0xc9d12a3e27bb1625
data8 0xc9f22ad82ba3d5f0, 0xca134113105e67b2
data8 0xca346d07b045a876, 0xca55aecf0e94bb88
data8 0xca77068257be9bab, 0xca98743ae1c693a8
data8 0xcab9f8122c99a101, 0xcadb9221e268c3b5
data8 0xcafd4283d8043dfd, 0xcb1f09520d37c6fb
data8 0xcb51ddcb9e93095e, 0xcb95f333968ad59b
data8 0xcbda64292d3ffd97, 0xcc1f3184af961596
data8 0xcc5bb1ac954d33e2, 0xcca12e9831fc6402
data8 0xcce70a67b64f24ad, 0xcd24794726477ea5
data8 0xcd6b096a0b70ee87, 0xcda9177738b15a90
data8 0xcdf05f2247dffab9, 0xce2f0f347f96f906
data8 0xce6e0be0cd551a61, 0xceb666b2c347d1de
data8 0xcef609b0cb874f00, 0xcf35fb5447e5c765
data8 0xcf763c47ee869f00, 0xcfb6cd3888d71785
data8 0xcff7aed4fbfbb447, 0xd038e1ce5167e3c6
data8 0xd07a66d7bfa0ebba, 0xd0bc3ea6b32d1b21
data8 0xd0f4f0e8f36c1bf8, 0xd1376458e34b037e
data8 0xd17a2ca133f78572, 0xd1bd4a80301c5715
data8 0xd1f71682b2fa4575, 0xd23ad555f773f059
data8 0xd2752c7039a5bf73, 0xd2b98ee008c06b59
data8 0xd2f4735ffd700280, 0xd32f99ed6d9ac0e1
data8 0xd374f0666c75d51c, 0xd3b0a7d13618e4a1
data8 0xd3eca2ea53bcec0c, 0xd428e23874f13a17
data8 0xd46f82fe293bc6d3, 0xd4ac57e9b7186420
data8 0xd4e972becb04e8b8, 0xd526d40a7a9b43a3
data8 0xd5647c5b73917370, 0xd5a26c4201bd6d13
data8 0xd5e0a45015350a7e, 0xd614b539c6194104
data8 0xd6537310e224283f, 0xd6927ab62244c917
data8 0xd6d1ccc1fc4ef4b7, 0xd71169cea98fdded
data8 0xd746a66a5bc9f6d9, 0xd786ce8f0fae5317
data8 0xd7bc7ff214c4e75a, 0xd7fd35467a517ed1
data8 0xd83e38838648d815, 0xd874a1db598b8951
data8 0xd8ab42205b80edaf, 0xd8ed1849d202f965
data8 0xd92432bd5a173685, 0xd9669ca45b03c23e
data8 0xd99e3327cf89574e, 0xd9d602b19b100466
data8 0xda0e0ba86c096841, 0xda5195fcdb1c3dce
data8 0xda8a1eb87a491f6c, 0xdac2e230b91c3f84
data8 0xdafbe0d0b66aea30, 0xdb351b04a8fafced
data8 0xdb6e9139e33cdd8e, 0xdba843ded7151ea1
data8 0xdbe2336319b61fc8, 0xdc1c60376789fa68
data8 0xdc56cacda82d0cd5, 0xdc917398f2797814
data8 0xdccc5b0d90a3e628, 0xdd0781a10469f0f2
data8 0xdd42e7ca0b52838f, 0xdd729ad01c69114d
data8 0xddae749c001fbf5e, 0xddea8f50a51c69b1
data8 0xde26eb69a0f0f111, 0xde576480262399bc
data8 0xde943789645933c8, 0xded14d58139a28af
data8 0xdf025c00bbf2b5c7, 0xdf3feb44d723a713
data8 0xdf715bc16c159be0, 0xdfaf66240e29cda8
data8 0xdfe139cbf6e19bdc, 0xe01fc0fe94d9fc52
data8 0xe051f92ffcc0bd60, 0xe090feec9c9a06ac
data8 0xe0c39d0c9ff862d6, 0xe0f668eeb99f188d
data8 0xe1362890eb663139, 0xe1695c7212aecbaa
data8 0xe19cbf0391bbbbe9, 0xe1d050901c531e85
data8 0xe2110903b4f4047a, 0xe2450559b4d80b6d
data8 0xe27931a231554ef3, 0xe2ad8e2ac3c5b04b
data8 0xe2e21b41b9694cce, 0xe316d93615862714
data8 0xe3590bd86a0d30f9, 0xe38e38e38e38e38e
data8 0xe3c397d1e6db7839, 0xe3f928f5953feb9e
data8 0xe42eeca17c62886c, 0xe464e32943446305
data8 0xe49b0ce15747a8a2, 0xe4d16a1eee94e9d4
data8 0xe4fa52107353f67d, 0xe5310a471f4d2dc3
data8 0xe567f6f1c2b9c224, 0xe59f18689a9e4c9a
data8 0xe5d66f04b8a68ecf, 0xe60dfb2005c192e9
data8 0xe645bd1544c7ea51, 0xe66fb21b505b20a0
data8 0xe6a7d32af4a7c59a, 0xe6e02b129c6a5ae4
data8 0xe70a9136a7403039, 0xe74349fb2d92a589
data8 0xe77c3a9c86ed7d42, 0xe7a713f88151518a
data8 0xe7e067453317ed2b, 0xe819f37a81871bb5
data8 0xe8454236bfaeca14, 0xe87f32f24c3fc90e
data8 0xe8aacd8688892ba6, 0xe8e523fd32f606f7
data8 0xe9110b5311407927, 0xe94bc8bf0c108fa3
data8 0xe977fdc439c2ca3c, 0xe9b3236528fc349e
data8 0xe9dfa70b745ac1b4, 0xea1b36268d0eaa38
data8 0xea480963fd394197, 0xea84034425f27484
data8 0xeab12713138dd1cc, 0xeade6db73a5e503b
data8 0xeb1b0268343b121b, 0xeb489b0b2bdb5f14
data8 0xeb765721e85f03d0, 0xebb389645f222f62
data8 0xebe198f090607e0c, 0xec0fcc9321024509
data8 0xec3e247da8b82f61, 0xec7c27d21321c9f7
data8 0xecaad5278824e453, 0xecd9a76d097d4e77
data8 0xed089ed5dcd99446, 0xed37bb95add09a1c
data8 0xed76c70508f904b6, 0xeda63bb05e7f93c6
data8 0xedd5d661daed2dc4, 0xee05974eef86b903
data8 0xee357ead791fc670, 0xee658cb3c134a463
data8 0xee95c1987f080211, 0xeec61d92d8c4314f
data8 0xeef6a0da64a014ac, 0xef274ba72a07c811
data8 0xef581e31a2c91260, 0xef8918b2bc43aec6
data8 0xefba3b63d89d7cbf, 0xefeb867ecffaa607
data8 0xf01cfa3df1b9c9fa, 0xf04e96dc05b43e2d
data8 0xf0805c944d827454, 0xf0b24ba285c495cb
data8 0xf0e46442e76f6569, 0xf116a6b2291d7896
data8 0xf1383fa9e9b5b381, 0xf16ac84f90083b9b
data8 0xf19d7b686dcb03d7, 0xf1d0593311db1757
data8 0xf20361ee8f1c711e, 0xf23695da7de51d3f
data8 0xf258d095e465cc35, 0xf28c4d0bfc982b34
data8 0xf2bff55eb3f0ea71, 0xf2f3c9cf9884636e
data8 0xf31670135ab9cc0f, 0xf34a8e9f0b54cdfb
data8 0xf37ed9fa6b8add3f, 0xf3a1cfe884ef6bb6
data8 0xf3d66689dcc8e8d3, 0xf40b2ab069d5c96a
data8 0xf42e718b90c8bc16, 0xf463822a0a3b4b00
data8 0xf498c1076015faf8, 0xf4bc5a19a33990b5
data8 0xf4f1e6a7d6f5425f, 0xf527a232cf6be334
data8 0xf54b8ecdcda90851, 0xf5819949c7ad87b4
data8 0xf5a5bac9213b48a9, 0xf5dc1501f324a812
data8 0xf6006bee86b5589e, 0xf63716b2fa067fa4
data8 0xf66df22fb6132b9c, 0xf6929fb98225deb1
data8 0xf6c9cd13021e3fea, 0xf6eeb177472cedae
data8 0xf713abf4cb0b3afb, 0xf74b4d5333684ef1
data8 0xf7707f75a72f8e94, 0xf7a874b97927af44
data8 0xf7cddf140aedf1d8, 0xf806291bacb7f7a9
data8 0xf82bcc43b92eafef, 0xf8646bf0defb759e
data8 0xf88a487dfc3ff5f7, 0xf8b03c2b46cdc17f
data8 0xf8e95541c152ae7a, 0xf90f832c2700c160
data8 0xf935c88e0c7f419b, 0xf96f5cd84fd86873
data8 0xf995dd53ebdd9d6d, 0xf9bc75a034436a41
data8 0xf9f686f26d5518de, 0xfa1d5b39b910a8c5
data8 0xfa4447acc4ecbfd2, 0xfa7ed7e51e6fdfb4
data8 0xfaa601394d49a1a0, 0xfacd431644ce0e40
data8 0xfaf49d96f7a75909, 0xfb2fd3c65e562fd5
data8 0xfb576c5762024805, 0xfb7f1debc22c4040
data8 0xfba6e89f32d0190a, 0xfbe2c803a0894893
data8 0xfc0ad1ff0ed9ecf0, 0xfc32f57bdfbcbe7f
data8 0xfc5b32968f99b21c, 0xfc83896bc861ab08
data8 0xfcabfa1861ed4815, 0xfce8d3cea7d3163e
data8 0xfd118595143ee273, 0xfd3a519943d4865a
data8 0xfd6337f8e1ae5a4b, 0xfd8c38d1c8e927eb
data8 0xfdb5544205095a53, 0xfdde8a67d2613531
data8 0xfe07db619e781611, 0xfe460768d80bf758
data8 0xfe6f9bfb06cd32f6, 0xfe994bcd3d14fcc2
data8 0xfec316fecaf3f2ab, 0xfeecfdaf33fadb80
data8 0xff16fffe2fa8fad6, 0xff411e0ba9db886d
data8 0xff6b57f7c33e4e9a, 0xff95ade2d1bd7358
data8 0xffc01fed60f86fb5, 0xffeaae3832b63956
ASM_SIZE_DIRECTIVE(T_table)
D_table:
ASM_TYPE_DIRECTIVE(D_table,@object)
data4 0x1e50f488, 0x1ebdc559, 0x1e649ec1, 0x9eed9b2c
data4 0x9e511c44, 0x9ec6d551, 0x9eefe248, 0x9e313854
data4 0x9f54ff18, 0x9d231411, 0x1ee5d63c, 0x9edf6b95
data4 0x9f332aaa, 0x1dc92a84, 0x1f73fb7b, 0x1e32f100
data4 0x9ea636f5, 0x9f6c3353, 0x9f405552, 0x1f33fd97
data4 0x1e975291, 0x9e59a11e, 0x1e47b0ba, 0x9d8ad33e
data4 0x1ea51bf6, 0x1f25d782, 0x9ecf534d, 0x1f55436f
data4 0x1d0975e4, 0x9f0633a1, 0x1f3e840a, 0x1f523a4c
data4 0x9f53cbbc, 0x9c8b5661, 0x9f6bc8eb, 0x1f4f6c7b
data4 0x9ed9b376, 0x9f5b30b6, 0x1f64fa5e, 0x1cbcc3e0
data4 0x1f343548, 0x1f62a6a2, 0x9f336abb, 0x9f1d15af
data4 0x1f476c83, 0x1ea86421, 0x1f33b2cf, 0x9e8f1348
data4 0x1f6fa829, 0x9f30ee3a, 0x9ebd6146, 0x1f2db598
data4 0x1ef9600d, 0x1f5b1427, 0x9edd741b, 0x1f51ef4e
data4 0x9f1aa57d, 0x9ee9b5e0, 0x9f17ecd7, 0x1ead71ff
data4 0x1f6c910e, 0x9e1837df, 0x9f0f17d9, 0x9e8350dd
data4 0x9d292f1b, 0x9e33b3ab, 0x9d6f0fe8, 0x9ed8c7cc
data4 0x9ec598c8, 0x9d56758c, 0x1e090c1e, 0x9ed4b941
data4 0x9f1fc4cf, 0x1f63513a, 0x9edd0abc, 0x1e3924dd
data4 0x1f60d56f, 0x1ea84424, 0x9e88f4fb, 0x1f205c09
data4 0x1ec9ae4e, 0x1d2d5738, 0x9f2c9f6d, 0x1e0765c2
data4 0x1e8bbdd7, 0x9f16d9f1, 0x9ea62627, 0x1f13904c
data4 0x1e566ab8, 0x9dca3d1a, 0x9e91f2a1, 0x9f14641c
data4 0x9f278946, 0x1f490c1e, 0x1f575eb6, 0x1f50b3fd
data4 0x9da32efb, 0x1ea95e59, 0x9e41e058, 0x9eada15f
data4 0x9e4fe66c, 0x1f3abc98, 0x1f1b8d1e, 0x9ece97e4
data4 0x1d188aed, 0x9e89b6ee, 0x1f287478, 0x9e8a161a
data4 0x1e4749f7, 0x9e68084a, 0x1e867f33, 0x9f462b63
data4 0x1db30792, 0x1f59a767, 0x9d1da4ae, 0x9f472a33
data4 0x1d1e91cd, 0x9f414824, 0x9f473d4f, 0x1f4b5783
data4 0x9f5b04b8, 0x9f5c205b, 0x1f309617, 0x9f0d6852
data4 0x9d96a609, 0x9f0965c2, 0x9e23f467, 0x9f089884
data4 0x9ec71458, 0x9ed6e955, 0x1e5e8691, 0x1f5b2bbc
data4 0x9f128268, 0x1ed40f5b, 0x1dc430ce, 0x1f345986
data4 0x1d778f72, 0x1e9b11d6, 0x9f5a40be, 0x9e07f61a
data4 0x9ed641a7, 0x9f334787, 0x1e952fd0, 0x1edeb5e2
data4 0x9e9f3eb1, 0x9e379fd9, 0x1f13102a, 0x9e5e80e1
data4 0x1c757944, 0x1dae2260, 0x1f183ab7, 0x1e55d576
data4 0x9e6bb99f, 0x9f52d7cb, 0x9e73a0f5, 0x1d4e1d14
data4 0x9dd05b53, 0x1f2261e4, 0x9d4ee73d, 0x1ede515e
data4 0x1f22a573, 0x9ecac348, 0x1e6a2ac0, 0x1e2787d2
data4 0x9eb64b87, 0x1f0c69c6, 0x9f470a01, 0x9d7c1686
data4 0x1e468ebe, 0x9f21ee2f, 0x9ee52116, 0x9e20f715
data4 0x1ed18533, 0x9f005b38, 0x9f20cb95, 0x1da72967
data4 0x1f1ba5d7, 0x1e2f8b16, 0x9c794f96, 0x9ca74ea3
data4 0x1f410555, 0x9eff2b96, 0x1ce8f0b1, 0x1f0cee77
data4 0x1f191edd, 0x9ed5fcbc, 0x1f30f242, 0x9e0ad369
data4 0x1ed8f3c8, 0x1f52bb0e, 0x9e9ce408, 0x1f18907f
data4 0x9ecdad40, 0x9e8af91d, 0x1d46698a, 0x9f4b93d6
data4 0x9f3f5d33, 0x1e2e52f7, 0x9f13aeec, 0x9f3b1969
data4 0x1f0996f4, 0x9f2a03df, 0x1e264767, 0x1f3ab1fb
data4 0x9f3193c9, 0x9f21ce22, 0x9eab624c, 0x9ecd8fb1
data4 0x1eaf9a85, 0x1f0c6a2c, 0x1eecbe61, 0x1f3fead9
data4 0x1f1d3a29, 0x1e9099ce, 0x1eadd875, 0x1e4dbfb8
data4 0x9dc640d2, 0x1f413680, 0x9f3f57b3, 0x1dfa1553
data4 0x1ec71c6b, 0x1e00cc00, 0x9f271e55, 0x1e5a88bb
data4 0x1f46cc2b, 0x1ee80ff9, 0x9e29c6f3, 0x1f15e229
data4 0x9ea83d66, 0x1f37408e, 0x9dacb66e, 0x1e6f6259
data4 0x9f106973, 0x1dd4e5ac, 0x1cbfdcc8, 0x9f231c9f
data4 0x9e8677e4, 0x9e9e695a, 0x1efd782b, 0x9dd26959
data4 0x9e80af69, 0x1f386fb3, 0x1f022e8c, 0x9e839967
data4 0x1ce6796f, 0x1e4c22c2, 0x1e57ef24, 0x1e919804
data4 0x9d7ea090, 0x1e40140a, 0x1f261b46, 0x1db75be2
data4 0x1f145019, 0x9e3102b9, 0x9e22507b, 0x1eae813c
data4 0x1f117e97, 0x1f282296, 0x1f3814b3, 0x1e17977b
data4 0x1f39d6ff, 0x9f1c81b9, 0x9eb5bcad, 0x1f0f596e
data4 0x1e757fd5, 0x9f090daa, 0x9f2532fc, 0x9eebafbb
data4 0x1f086556, 0x9eeedde8, 0x9f32e174, 0x1e33c030
data4 0x1f1f145a, 0x1e6e556c, 0x1e419ffb, 0x9eb6019a
data4 0x9e872a2e, 0x1e113136, 0x1e93096f, 0x1f39be40
data4 0x1f1665ad, 0x9db81d7d, 0x9cd29091, 0x1e3f4af7
data4 0x9f23176c, 0x9eccf9b3, 0x1f34fc6c, 0x9ed36894
data4 0x1ef08e06, 0x9f3b46bb, 0x9f2c850b, 0x1f1565a4
data4 0x1e887bc3, 0x1e92629c, 0x9f11ac9e, 0x9e5579f3
data4 0x1e4d5790, 0x9ee1c3d1, 0x9e916aec, 0x9eb8d9b8
data4 0x1db46105, 0x1e168663, 0x1f26a942, 0x9f0f0383
data4 0x9f079032, 0x9ecae1d8, 0x1ed3b34c, 0x9edc5ee6
data4 0x9e8a75a7, 0x1f3c3de2, 0x9ee5041e, 0x1f08c727
data4 0x1d02d7ae, 0x9f36adda, 0x9ef9a857, 0x9ef5cb3a
data4 0x9eee73da, 0x9da5d629, 0x1e0e99be, 0x1e5159b9
data4 0x1f2eac89, 0x9e8eedc5, 0x1dd0ec90, 0x1f229aff
data4 0x1ed9c3e6, 0x1e95c55a, 0x9f0c24e4, 0x1e8afed6
data4 0x1e599a96, 0x1e881b21, 0x1eab84b9, 0x9ba2bb0e
data4 0x9e33ab10, 0x1f1710b5, 0x1ebfa271, 0x9e90bbc5
data4 0x9f32515b, 0x9b32aae8, 0x1eda455c, 0x1da8186e
data4 0x9e8917ff, 0x1ec4d08e, 0x1c90069d, 0x9f2f1d29
data4 0x9ecee86d, 0x9f234d1f, 0x1f370724, 0x1da87496
data4 0x1e7959f0, 0x9e8ada34, 0x1f1c7f6f, 0x1edd576b
data4 0x9de91e8b, 0x1ec4ef89, 0x1f32078a, 0x1e9925e2
data4 0x9d8eeccb, 0x9ea3d011, 0x1f231fdf, 0x9f1dbdfa
data4 0x1e7507a3, 0x1ec42614, 0x9e8693cb, 0x9ec68398
data4 0x1d5b05fb, 0x1de32119, 0x9f003429, 0x9ec16d92
data4 0x9f095315, 0x9f119d2c, 0x9ed0c984, 0x9f090662
data4 0x9e59aa1f, 0x9ed4e64a, 0x9f2798a7, 0x9f23624d
data4 0x1e0467d9, 0x1f22e7e7, 0x1e915256, 0x9cb4df70
data4 0x9e6f687c, 0x9e3c35e5, 0x9e5757ab, 0x9f031fa1
data4 0x1f25bff7, 0x1f0e58c2, 0x1ef3ce04, 0x1f002ecb
data4 0x9ebdc836, 0x9ed657dd, 0x9f149441, 0x9e8544b2
data4 0x1cd8ff1e, 0x1e9bb463, 0x1eaa1c5c, 0x1f200c1a
data4 0x1edbfbaf, 0x1f18724d, 0x9ed63c22, 0x9f08e045
data4 0x1f13ad07, 0x9e949311, 0x9f0c50d4, 0x1e824516
data4 0x1d5e52ba, 0x1d583fbd, 0x1e3b60a9, 0x9effe6d3
data4 0x1f0d0508, 0x1f00be77, 0x9e404bfa, 0x9e1ca381
data4 0x9f084dd8, 0x9e6db85d, 0x1db698e4, 0x9ebd1871
data4 0x9ecc2679, 0x1ee68442, 0x1edb1050, 0x9dbc96a4
data4 0x9f27c1f4, 0x1c99b756, 0x1eb4400a, 0x9f24390a
data4 0x1d927875, 0x9f074faa, 0x1e9dc2c3, 0x1f13c0d2
data4 0x1e3c9685, 0x9e6b6f75, 0x9db9cb31, 0x1ea5f3aa
data4 0x9d992c61, 0x1f1015e4, 0x1f194f70, 0x9e19d2b3
data4 0x9d89116c, 0x1f23cd35, 0x1e33d3a2, 0x1ee331b8
data4 0x1d5ba7ec, 0x9f273788, 0x9e6907f4, 0x9ed5f912
data4 0x9edd458d, 0x1e2ca7b2, 0x1ef81fe4, 0x1dc7ade6
data4 0x1e876e51, 0x9f04ec89, 0x1f1da63a, 0x1ec02bd0
data4 0x9e71326f, 0x1e7847b4, 0x1f0de618, 0x9e036cb6
data4 0x1eec61e2, 0x1ef1758b, 0x9ee880a3, 0x1ed269d7
data4 0x1e27edd3, 0x9e8a81a1, 0x1eacb84d, 0x9e1aad37
data4 0x1f1aa8f7, 0x1e9bbd90, 0x1ea1b61f, 0x9ed41c2f
data4 0x1dbb5dd6, 0x1f0ec733, 0x9df06b1b, 0x1e06fef1
data4 0x9edede3a, 0x1edeb5e2, 0x1f0e63ee, 0x9db316bb
data4 0x9efc1ad3, 0x1f01fbb5, 0x9cc0d078, 0x1ea28b36
data4 0x9e9dd205, 0x9e791534, 0x1da1c8d5, 0x9e8195cc
data4 0x1f0681a4, 0x1eeaf1e2, 0x9ef83b37, 0x9f22a92b
data4 0x1eabc4ce, 0x1f10eefb, 0x1e06d9aa, 0x1e7cacd5
data4 0x1f1ea087, 0x1eb21983, 0x9f100c78, 0x1e840abe
data4 0x9efab66c, 0x1f183fa8, 0x9e84ee68, 0x9eea083d
data4 0x9ee23a74, 0x1f1351d7, 0x9ec5d42a, 0x9f071f57
data4 0x9ef578d9, 0x9f1aa7e7, 0x1eb02044, 0x1f151a2e
data4 0x9c0dc8b2, 0x9ef4087a, 0x1ec12b93, 0x1c1a946b
data4 0x1e89946f, 0x9dafe8c3, 0x1d295288, 0x9e8497ab
data4 0x1ec000c6, 0x1e102f29, 0x1e542256, 0x1e67d44d
data4 0x1ef688d8, 0x1f0e0f29, 0x1e67861f, 0x1e869748
data4 0x1ee6aa6e, 0x9e4d228b, 0x9e50be5b, 0x1e9fe225
data4 0x9ea34102, 0x9e628a3b, 0x9ed9fd83, 0x1ecd7109
data4 0x1f1864ff, 0x1ea19b76, 0x1db0d1c9, 0x9dff519b
data4 0x1e8fea71, 0x9ee82e9a, 0x9f08919b, 0x9ef5c8ae
data4 0x9ee446a4, 0x1ea59444, 0x1eb74230, 0x1ea13fbf
data4 0x9ea6a3ea, 0x1e5f2797, 0x9e0adb07, 0x9d3adadd
data4 0x1ebf2ee2, 0x1da19bfa, 0x1e8dea6d, 0x1ec4fea9
data4 0x1e669f22, 0x1dc5f919, 0x9ed25caa, 0x1ee475b1
data4 0x1ed0603e, 0x9eacb35c, 0x1dc00b27, 0x1e2f9991
data4 0x1e7b0406, 0x1eaa3387, 0x9d865bde, 0x1eb78a48
data4 0x1c40ae2e, 0x1ee9838b, 0x9f0f0d7f, 0x1e3e5d26
data4 0x1e99e7a6, 0x9e681ccf, 0x9e93ed65, 0x9eeb6a66
data4 0x1e29e9af, 0x9e96f923, 0x9e74f11d, 0x9f1474da
data4 0x1eec2ea7, 0x1ebf7aa3, 0x9c25dcca, 0x9f0553c2
data4 0x9e599efd, 0x1d2ab490, 0x1e95d7cd, 0x9ee4b20e
data4 0x9d988ce5, 0x9ef9787e, 0x9dbbba5b, 0x9f12c304
data4 0x1e3b9d70, 0x1e7bcae8, 0x9d98bb6e, 0x9e8e6b01
data4 0x9f07d03b, 0x9d67c822, 0x9f0ef69e, 0x1c7c0fe3
data4 0x9e9bfbb9, 0x9e83b84b, 0x1efbf15e, 0x9ecfa6a6
data4 0x9c91158e, 0x9ecf6770, 0x1ee1e3a8, 0x9dc95ec0
data4 0x1ef603f7, 0x1d5e52ba, 0x1c477d1b, 0x9e955cd8
data4 0x1ed665b0, 0x9e8376c4, 0x9c0ee88e, 0x1e8c989e
data4 0x1ea2df29, 0x9d961e5c, 0x1e101813, 0x1e7fffff
data4 0x9e5abff4, 0x1dbddd71, 0x1eb69100, 0x1e71f114
data4 0x1e9ca798, 0x1ef62c8d, 0x9db4e55a, 0x1dbe69ce
data4 0x9ef1c01f, 0x1f044a2a, 0x9eb9e0d7, 0x9ee59745
data4 0x9e874803, 0x1ea0b418, 0x9e13572a, 0x1ddbb3a2
data4 0x9ec0e391, 0x1e89fba1, 0x1ee8b261, 0x9e5d25f0
data4 0x9ef222cb, 0x9ef135ec, 0x1ea04b9a, 0x9f04291f
data4 0x9e969254, 0x9ee32f08, 0x9ed909d3, 0x9e362640
data4 0x9ec20735, 0x1e50131b, 0x9ed4e049, 0x1ee8e817
data4 0x1e1e09c0, 0x9ea643c5, 0x9e5a1ab6, 0x9e389059
data4 0x1e560947, 0x1d02b877, 0x1e4475ab, 0x9ea9aaf6
data4 0x1e95bc5e, 0x1eaf6afd, 0x1d43067d, 0x9d043821
data4 0x9e97baa9, 0x1de5c4f9, 0x9e9a0069, 0x9e1b9944
data4 0x1eb13686, 0x9eb907eb, 0x1e059589, 0x1cbd0f93
data4 0x9eb7e6ae, 0x1e9fa175, 0x1ee5bdf4, 0x1e8052f7
data4 0x9c80d1e3, 0x1bfbe28e, 0x9e672b3b, 0x9ecacf19
data4 0x9e3c04be, 0x1dfe8c5c, 0x1e1ba9cb, 0x1eb40b1e
data4 0x1ec7e7f6, 0x9d0d45b3, 0x1ef0113b, 0x9a155fa3
data4 0x1e28ec3b, 0x1e7ca8df, 0x9d2f91b4, 0x1eccd9ed
data4 0x9ed943bc, 0x9ccaab19, 0x9e8a5c58, 0x1ec3bca8
data4 0x1ed78dc7, 0x9ed391a8, 0x9e938f6e, 0x9ec4a030
data4 0x9e80346e, 0x1e7a4686, 0x9e284315, 0x9e39584c
data4 0x1ebdc9b4, 0x9e9cfce5, 0x9ef55c65, 0x1e2941e7
data4 0x9efbe59f, 0x1d87c41b, 0x1e40befc, 0x1e3d05b5
data4 0x1de9ea67, 0x1ec9a21c, 0x1decb69a, 0x1df6e75a
data4 0x9e8030ab, 0x9db20540, 0x9ef1e977, 0x1e3cdc43
data4 0x1e0492b0, 0x9e91d872, 0x1e775346, 0x9e939978
data4 0x1eb2714e, 0x1e49a203, 0x9e10195a, 0x1ef1ffc3
data4 0x9ea8b709, 0x9e832e27, 0x1ed5ac3b, 0x1edb20a6
data4 0x1e4dbd4e, 0x1efbb932, 0x1d8170ec, 0x1e6c4849
data4 0x1f008e17, 0x1e8000c4, 0x1d855ecf, 0x9e37cb85
data4 0x1ecffdf5, 0x1eba6519, 0x9edbe600, 0x1ea3e5e7
data4 0x1ed4fb39, 0x1f00be77, 0x1e6f4484, 0x9e9e7107
data4 0x9e30b29d, 0x9ee6e174, 0x1e3a2656, 0x9dd72f3f
data4 0x9ee12138, 0x1ed16fed, 0x9ece8a02, 0x9ca5b249
data4 0x9eafd508, 0x9ef0e9fc, 0x1d1307ac, 0x1eecee20
data4 0x1cf60c6f, 0x9d556216, 0x9eaed175, 0x9ec919f4
data4 0x1ec2c988, 0x1cd82772, 0x9dc99456, 0x1eab0467
data4 0x1e89b36f, 0x1c757944, 0x1eef9abd, 0x9e98664d
ASM_SIZE_DIRECTIVE(D_table)
.align 32
.global cbrtl#
.section .text
.proc cbrtl#
.align 32
cbrtl:
{ .mfi
getf.sig r3=f8
// will continue only for normal/denormal numbers
(p0) fclass.nm.unc p12,p7 = f8, 0x1b
// r2 = pointer to C_1...C_6 followed by T_table
addl r2 = @ltoff(poly_coeffs), gp;;
}
{.mfi
// r29=2/3*bias -63=0xaaaa-0x3f=0xaa6b
mov r29=0xaa6b
// normalize a
fma.s1 f14=f8,f1,f0
// r27 = pointer to D table
addl r27 = @ltoff(D_table), gp;;
}
{.mib
nop.m 0
(p7) cmp.eq p12,p0=r3,r0
nop.b 0;;
}
{.mfb
// load start address for C_1...C_6 followed by T_table
ld8 r2=[r2]
(p12) fma.s0 f8=f8,f1,f0
(p12) br.ret.spnt b0;;
}
{.mmf
// load C_1
ldfe f7=[r2],16
// load start address of D table
ld8 r27=[r27]
// y=frcpa(a)
frcpa.s0 f8,p6=f1,f8;;
}
{.mmi
// load C_2
ldfe f9=[r2],16;;
// load C_3, C_4
ldfpd f10,f11=[r2],16
nop.i 0;;
}
{.mmi
// get normalized significand
getf.sig r23=f14
// get exponent
getf.exp r24=f14
mov r25=0x20000;;
}
{.mii
// get r26=sign
and r26=r24,r25
// eliminate leading 1 from r23=2nd table index
shl r23=r23,1
// eliminate sign from exponent (r25)
andcm r25=r24,r25;;
}
{.mfi
// load C_5,C_6
(p6) ldfpd f12,f13=[r2],16
// r=1-a*y
(p6) fnma.s1 f6=f8,f14,f1
// 1: exponent*=5; // (2^{16}-1)/3=0x5555
shladd r24=r25,2,r25;;
}
{.mib
// r30=(5*expon)*16
shladd r30=r24,4,r0
// r28=3*exponent
shladd r28=r25,1,r25
nop.b 0;;
}
{.mmi
// r28=6*exponent
shladd r28=r28,1,r0
// r24=17*expon
add r24=r24,r30
// r23=2nd table index (8 bits)
shr.u r23=r23,56;;
}
{.mmi
// adjust T_table pointer by 2nd index
shladd r2=r23,3,r2
// adjust D_table pointer by 2nd index
shladd r27=r23,2,r27
// r30=(17*expon)*16^2
shl r30=r24,8;;
}
{.mmi
// r24=expon*(2^16-1)/3
add r24=r24,r30;;
// r24=expon*(2^20+2)/3=expon*0x55556
shladd r24=r24,4,r28
nop.i 0;;
}
{.mii
nop.m 0
// r24=floor(expon/3)
shr.u r24=r24,20
nop.i 0;;
}
{.mmi
nop.m 0
// r28=3*exponent
shladd r28=r24,1,r24
// bias exponent
add r24=r29,r24;;
}
{.mmi
// get remainder of exponent/3
sub r25=r25,r28;;
// add sign to exponent
or r24=r24,r26
// remainder <<=8
shl r25=r25,8;;
}
{.mfi
// adjust D_table pointer by 1st index
shladd r27=r25,2,r27
// P_1=C_1+C_2*r
(p6) fma.s1 f7=f9,f6,f7
// adjust T_table pointer by 1st index
shladd r2=r25,3,r2
}
{.mfi
// f14=sign*2^{exponent/3}
(p6) setf.exp f14=r24
// r2=r*r
(p6) fma.s1 f9=f6,f6,f0
nop.i 0;;
}
{.mfi
// load D
(p6) ldfs f15=[r27]
// P_2=C_3+C_4*r
(p6) fma.s1 f10=f11,f6,f10
nop.i 0
}
{.mfi
// load T
(p6) ldf8 f8=[r2]
// P_3=C_5+C_6*r
(p6) fma.s1 f12=f13,f6,f12
nop.i 0;;
}
{.mfi
nop.m 0
// P_4=D-r*P_1
(p6) fnma.s1 f15=f6,f7,f15
nop.i 0
}
{.mfi
nop.m 0
// r3=r*r2
(p6) fma.s1 f6=f6,f9,f0
nop.i 0;;
}
{.mfi
nop.m 0
// P_5=P_2+r2*P_3
(p6) fma.s1 f10=f9,f12,f10
nop.i 0;;
}
{.mfi
nop.m 0
// T=T*(sign*2^{exponent/3})
(p6) fma.s1 f8=f8,f14,f0
nop.i 0
}
{.mfi
nop.m 0
// P=P_4-r3*P_5
(p6) fnma.s1 f6=f6,f10,f15
nop.i 0;;
}
{.mfb
nop.m 0
// result=T+T*p
(p6) fma.s0 f8=f8,f6,f8
br.ret.sptk b0;;
}
.endp cbrtl
ASM_SIZE_DIRECTIVE(cbrtl)

249
sysdeps/ia64/fpu/s_ceil.S Normal file
View File

@ -0,0 +1,249 @@
.file "ceil.s"
// Copyright (c) 2000, 2001, Intel Corporation
// All rights reserved.
//
// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
//
// WARRANTY DISCLAIMER
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
// http://developer.intel.com/opensource.
//
#include "libm_support.h"
.align 32
.global ceil#
.section .text
.proc ceil#
.align 32
// History
//==============================================================
// 2/02/00: Initial version
// 6/13/00: Improved speed
// 6/27/00: Eliminated incorrect invalid flag setting
// API
//==============================================================
// double ceil(double x)
// general input registers:
ceil_GR_FFFF = r14
ceil_GR_signexp = r15
ceil_GR_exponent = r16
ceil_GR_expmask = r17
ceil_GR_bigexp = r18
// predicate registers used:
// p6 ==> Input is NaN, infinity, zero
// p7 ==> Input is denormal
// p8 ==> Input is <0
// p9 ==> Input is >=0
// p10 ==> Input is already an integer (bigger than largest integer)
// p11 ==> Input is not a large integer
// p12 ==> Input is a smaller integer
// p13 ==> Input is not an even integer, so inexact must be set
// p14 ==> Input is between -1 and 0, so result will be -0 and inexact
// floating-point registers used:
CEIL_SIGNED_ZERO = f7
CEIL_NORM_f8 = f9
CEIL_FFFF = f10
CEIL_INEXACT = f11
CEIL_FLOAT_INT_f8 = f12
CEIL_INT_f8 = f13
CEIL_adj = f14
CEIL_MINUS_ONE = f15
// Overview of operation
//==============================================================
// double ceil(double x)
// Return an integer value (represented as a double) that is the smallest
// value not less than x
// This is x rounded toward +infinity to an integral value.
// Inexact is set if x != ceil(x)
// **************************************************************************
// Set denormal flag for denormal input and
// and take denormal fault if necessary.
// Is the input an integer value already?
// double_extended
// if the exponent is > 1003e => 3F(true) = 63(decimal)
// we have a significand of 64 bits 1.63-bits.
// If we multiply by 2^63, we no longer have a fractional part
// So input is an integer value already.
// double
// if the exponent is >= 10033 => 34(true) = 52(decimal)
// 34 + 3ff = 433
// we have a significand of 53 bits 1.52-bits. (implicit 1)
// If we multiply by 2^52, we no longer have a fractional part
// So input is an integer value already.
// single
// if the exponent is > 10016 => 17(true) = 23(decimal)
// we have a significand of 24 bits 1.23-bits. (implicit 1)
// If we multiply by 2^23, we no longer have a fractional part
// So input is an integer value already.
// If x is NAN, ZERO, or INFINITY, then return
// qnan snan inf norm unorm 0 -+
// 1 1 1 0 0 1 11 0xe7
ceil:
{ .mfi
getf.exp ceil_GR_signexp = f8
fcvt.fx.trunc.s1 CEIL_INT_f8 = f8
addl ceil_GR_bigexp = 0x10033, r0
}
{ .mfi
addl ceil_GR_FFFF = -1,r0
fcmp.lt.s1 p8,p9 = f8,f0
mov ceil_GR_expmask = 0x1FFFF ;;
}
// p7 ==> denorm
{ .mfi
setf.sig CEIL_FFFF = ceil_GR_FFFF
fclass.m p7,p0 = f8, 0x0b
nop.i 999
}
{ .mfi
nop.m 999
fnorm CEIL_NORM_f8 = f8
nop.i 999 ;;
}
// Form 0 with sign of input in case negative zero is needed
{ .mfi
nop.m 999
fmerge.s CEIL_SIGNED_ZERO = f8, f0
nop.i 999
}
{ .mfi
nop.m 999
fsub.s1 CEIL_MINUS_ONE = f0, f1
nop.i 999 ;;
}
// p6 ==> NAN, INF, ZERO
{ .mfb
nop.m 999
fclass.m p6,p10 = f8, 0xe7
(p7) br.cond.spnt L(CEIL_DENORM) ;;
}
L(CEIL_COMMON):
.pred.rel "mutex",p8,p9
// Set adjustment to add to trunc(x) for result
// If x>0, adjustment is 1.0
// If x<=0, adjustment is 0.0
{ .mfi
and ceil_GR_exponent = ceil_GR_signexp, ceil_GR_expmask
(p9) fadd.s1 CEIL_adj = f1,f0
nop.i 999
}
{ .mfi
nop.m 999
(p8) fadd.s1 CEIL_adj = f0,f0
nop.i 999 ;;
}
{ .mfi
(p10) cmp.ge.unc p10,p11 = ceil_GR_exponent, ceil_GR_bigexp
(p6) fnorm.d f8 = f8
nop.i 999 ;;
}
{ .mfi
nop.m 999
(p11) fcvt.xf CEIL_FLOAT_INT_f8 = CEIL_INT_f8
nop.i 999 ;;
}
{ .mfi
nop.m 999
(p10) fnorm.d f8 = CEIL_NORM_f8
nop.i 999 ;;
}
// Is -1 < x < 0? If so, result will be -0. Special case it with p14 set.
{ .mfi
nop.m 999
(p8) fcmp.gt.unc.s1 p14,p0 = CEIL_NORM_f8, CEIL_MINUS_ONE
nop.i 999 ;;
}
{ .mfi
(p14) cmp.ne p11,p0 = r0,r0
(p14) fnorm.d f8 = CEIL_SIGNED_ZERO
nop.i 999
}
{ .mfi
nop.m 999
(p14) fmpy.s0 CEIL_INEXACT = CEIL_FFFF,CEIL_FFFF
nop.i 999 ;;
}
{ .mfi
nop.m 999
(p11) fadd.d f8 = CEIL_FLOAT_INT_f8,CEIL_adj
nop.i 999 ;;
}
{ .mfi
nop.m 999
(p11) fcmp.eq.unc.s1 p12,p13 = CEIL_FLOAT_INT_f8, CEIL_NORM_f8
nop.i 999 ;;
}
// Set inexact if result not equal to input
{ .mfi
nop.m 999
(p13) fmpy.s0 CEIL_INEXACT = CEIL_FFFF,CEIL_FFFF
nop.i 999
}
// Set result to input if integer
{ .mfb
nop.m 999
(p12) fnorm.d f8 = CEIL_NORM_f8
br.ret.sptk b0 ;;
}
// Here if input denorm
L(CEIL_DENORM):
{ .mfb
getf.exp ceil_GR_signexp = CEIL_NORM_f8
fcvt.fx.trunc.s1 CEIL_INT_f8 = CEIL_NORM_f8
br.cond.sptk L(CEIL_COMMON) ;;
}
.endp ceil
ASM_SIZE_DIRECTIVE(ceil)

249
sysdeps/ia64/fpu/s_ceilf.S Normal file
View File

@ -0,0 +1,249 @@
.file "ceilf.s"
// Copyright (c) 2000, 2001, Intel Corporation
// All rights reserved.
//
// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
//
// WARRANTY DISCLAIMER
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
// http://developer.intel.com/opensource.
//
#include "libm_support.h"
.align 32
.global ceilf#
.section .text
.proc ceilf#
.align 32
// History
//==============================================================
// 2/02/00: Initial version
// 6/13/00: Improved speed
// 6/27/00: Eliminated incorrect invalid flag setting
// API
//==============================================================
// float ceilf(float x)
// general input registers:
ceil_GR_FFFF = r14
ceil_GR_signexp = r15
ceil_GR_exponent = r16
ceil_GR_expmask = r17
ceil_GR_bigexp = r18
// predicate registers used:
// p6 ==> Input is NaN, infinity, zero
// p7 ==> Input is denormal
// p8 ==> Input is <0
// p9 ==> Input is >=0
// p10 ==> Input is already an integer (bigger than largest integer)
// p11 ==> Input is not a large integer
// p12 ==> Input is a smaller integer
// p13 ==> Input is not an even integer, so inexact must be set
// p14 ==> Input is between -1 and 0, so result will be -0 and inexact
// floating-point registers used:
CEIL_SIGNED_ZERO = f7
CEIL_NORM_f8 = f9
CEIL_FFFF = f10
CEIL_INEXACT = f11
CEIL_FLOAT_INT_f8 = f12
CEIL_INT_f8 = f13
CEIL_adj = f14
CEIL_MINUS_ONE = f15
// Overview of operation
//==============================================================
// float ceilf(float x)
// Return an integer value (represented as a float) that is the smallest
// value not less than x
// This is x rounded toward +infinity to an integral value.
// Inexact is set if x != ceilf(x)
// **************************************************************************
// Set denormal flag for denormal input and
// and take denormal fault if necessary.
// Is the input an integer value already?
// double_extended
// if the exponent is > 1003e => 3F(true) = 63(decimal)
// we have a significand of 64 bits 1.63-bits.
// If we multiply by 2^63, we no longer have a fractional part
// So input is an integer value already.
// double
// if the exponent is >= 10033 => 34(true) = 52(decimal)
// 34 + 3ff = 433
// we have a significand of 53 bits 1.52-bits. (implicit 1)
// If we multiply by 2^52, we no longer have a fractional part
// So input is an integer value already.
// single
// if the exponent is > 10016 => 17(true) = 23(decimal)
// we have a significand of 24 bits 1.23-bits. (implicit 1)
// If we multiply by 2^23, we no longer have a fractional part
// So input is an integer value already.
// If x is NAN, ZERO, or INFINITY, then return
// qnan snan inf norm unorm 0 -+
// 1 1 1 0 0 1 11 0xe7
ceilf:
{ .mfi
getf.exp ceil_GR_signexp = f8
fcvt.fx.trunc.s1 CEIL_INT_f8 = f8
addl ceil_GR_bigexp = 0x10016, r0
}
{ .mfi
addl ceil_GR_FFFF = -1,r0
fcmp.lt.s1 p8,p9 = f8,f0
mov ceil_GR_expmask = 0x1FFFF ;;
}
// p7 ==> denorm
{ .mfi
setf.sig CEIL_FFFF = ceil_GR_FFFF
fclass.m p7,p0 = f8, 0x0b
nop.i 999
}
{ .mfi
nop.m 999
fnorm CEIL_NORM_f8 = f8
nop.i 999 ;;
}
// Form 0 with sign of input in case negative zero is needed
{ .mfi
nop.m 999
fmerge.s CEIL_SIGNED_ZERO = f8, f0
nop.i 999
}
{ .mfi
nop.m 999
fsub.s1 CEIL_MINUS_ONE = f0, f1
nop.i 999 ;;
}
// p6 ==> NAN, INF, ZERO
{ .mfb
nop.m 999
fclass.m p6,p10 = f8, 0xe7
(p7) br.cond.spnt L(CEIL_DENORM) ;;
}
L(CEIL_COMMON):
.pred.rel "mutex",p8,p9
// Set adjustment to add to trunc(x) for result
// If x>0, adjustment is 1.0
// If x<=0, adjustment is 0.0
{ .mfi
and ceil_GR_exponent = ceil_GR_signexp, ceil_GR_expmask
(p9) fadd.s1 CEIL_adj = f1,f0
nop.i 999
}
{ .mfi
nop.m 999
(p8) fadd.s1 CEIL_adj = f0,f0
nop.i 999 ;;
}
{ .mfi
(p10) cmp.ge.unc p10,p11 = ceil_GR_exponent, ceil_GR_bigexp
(p6) fnorm.s f8 = f8
nop.i 999 ;;
}
{ .mfi
nop.m 999
(p11) fcvt.xf CEIL_FLOAT_INT_f8 = CEIL_INT_f8
nop.i 999 ;;
}
{ .mfi
nop.m 999
(p10) fnorm.s f8 = CEIL_NORM_f8
nop.i 999 ;;
}
// Is -1 < x < 0? If so, result will be -0. Special case it with p14 set.
{ .mfi
nop.m 999
(p8) fcmp.gt.unc.s1 p14,p0 = CEIL_NORM_f8, CEIL_MINUS_ONE
nop.i 999 ;;
}
{ .mfi
(p14) cmp.ne p11,p0 = r0,r0
(p14) fnorm.s f8 = CEIL_SIGNED_ZERO
nop.i 999
}
{ .mfi
nop.m 999
(p14) fmpy.s0 CEIL_INEXACT = CEIL_FFFF,CEIL_FFFF
nop.i 999 ;;
}
{ .mfi
nop.m 999
(p11) fadd.s f8 = CEIL_FLOAT_INT_f8,CEIL_adj
nop.i 999 ;;
}
{ .mfi
nop.m 999
(p11) fcmp.eq.unc.s1 p12,p13 = CEIL_FLOAT_INT_f8, CEIL_NORM_f8
nop.i 999 ;;
}
// Set inexact if result not equal to input
{ .mfi
nop.m 999
(p13) fmpy.s0 CEIL_INEXACT = CEIL_FFFF,CEIL_FFFF
nop.i 999
}
// Set result to input if integer
{ .mfb
nop.m 999
(p12) fnorm.s f8 = CEIL_NORM_f8
br.ret.sptk b0 ;;
}
// Here if input denorm
L(CEIL_DENORM):
{ .mfb
getf.exp ceil_GR_signexp = CEIL_NORM_f8
fcvt.fx.trunc.s1 CEIL_INT_f8 = CEIL_NORM_f8
br.cond.sptk L(CEIL_COMMON) ;;
}
.endp ceilf
ASM_SIZE_DIRECTIVE(ceilf)

249
sysdeps/ia64/fpu/s_ceill.S Normal file
View File

@ -0,0 +1,249 @@
.file "ceill.s"
// Copyright (c) 2000, 2001, Intel Corporation
// All rights reserved.
//
// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
//
// WARRANTY DISCLAIMER
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
// http://developer.intel.com/opensource.
//
#include "libm_support.h"
.align 32
.global ceill#
.section .text
.proc ceill#
.align 32
// History
//==============================================================
// 2/02/00: Initial version
// 6/13/00: Improved speed
// 6/27/00: Eliminated incorrect invalid flag setting
// API
//==============================================================
// double ceill(double x)
// general input registers:
ceil_GR_FFFF = r14
ceil_GR_signexp = r15
ceil_GR_exponent = r16
ceil_GR_expmask = r17
ceil_GR_bigexp = r18
// predicate registers used:
// p6 ==> Input is NaN, infinity, zero
// p7 ==> Input is denormal
// p8 ==> Input is <0
// p9 ==> Input is >=0
// p10 ==> Input is already an integer (bigger than largest integer)
// p11 ==> Input is not a large integer
// p12 ==> Input is a smaller integer
// p13 ==> Input is not an even integer, so inexact must be set
// p14 ==> Input is between -1 and 0, so result will be -0 and inexact
// floating-point registers used:
CEIL_SIGNED_ZERO = f7
CEIL_NORM_f8 = f9
CEIL_FFFF = f10
CEIL_INEXACT = f11
CEIL_FLOAT_INT_f8 = f12
CEIL_INT_f8 = f13
CEIL_adj = f14
CEIL_MINUS_ONE = f15
// Overview of operation
//==============================================================
// long double ceill(long double x)
// Return an integer value (represented as a long double) that is the smallest
// value not less than x
// This is x rounded toward +infinity to an integral value.
// Inexact is set if x != ceill(x)
// **************************************************************************
// Set denormal flag for denormal input and
// and take denormal fault if necessary.
// Is the input an integer value already?
// double_extended
// if the exponent is > 1003e => 3F(true) = 63(decimal)
// we have a significand of 64 bits 1.63-bits.
// If we multiply by 2^63, we no longer have a fractional part
// So input is an integer value already.
// double
// if the exponent is >= 10033 => 34(true) = 52(decimal)
// 34 + 3ff = 433
// we have a significand of 53 bits 1.52-bits. (implicit 1)
// If we multiply by 2^52, we no longer have a fractional part
// So input is an integer value already.
// single
// if the exponent is > 10016 => 17(true) = 23(decimal)
// we have a significand of 24 bits 1.23-bits. (implicit 1)
// If we multiply by 2^23, we no longer have a fractional part
// So input is an integer value already.
// If x is NAN, ZERO, or INFINITY, then return
// qnan snan inf norm unorm 0 -+
// 1 1 1 0 0 1 11 0xe7
ceill:
{ .mfi
getf.exp ceil_GR_signexp = f8
fcvt.fx.trunc.s1 CEIL_INT_f8 = f8
addl ceil_GR_bigexp = 0x1003e, r0
}
{ .mfi
addl ceil_GR_FFFF = -1,r0
fcmp.lt.s1 p8,p9 = f8,f0
mov ceil_GR_expmask = 0x1FFFF ;;
}
// p7 ==> denorm
{ .mfi
setf.sig CEIL_FFFF = ceil_GR_FFFF
fclass.m p7,p0 = f8, 0x0b
nop.i 999
}
{ .mfi
nop.m 999
fnorm CEIL_NORM_f8 = f8
nop.i 999 ;;
}
// Form 0 with sign of input in case negative zero is needed
{ .mfi
nop.m 999
fmerge.s CEIL_SIGNED_ZERO = f8, f0
nop.i 999
}
{ .mfi
nop.m 999
fsub.s1 CEIL_MINUS_ONE = f0, f1
nop.i 999 ;;
}
// p6 ==> NAN, INF, ZERO
{ .mfb
nop.m 999
fclass.m p6,p10 = f8, 0xe7
(p7) br.cond.spnt L(CEIL_DENORM) ;;
}
L(CEIL_COMMON):
.pred.rel "mutex",p8,p9
// Set adjustment to add to trunc(x) for result
// If x>0, adjustment is 1.0
// If x<=0, adjustment is 0.0
{ .mfi
and ceil_GR_exponent = ceil_GR_signexp, ceil_GR_expmask
(p9) fadd.s1 CEIL_adj = f1,f0
nop.i 999
}
{ .mfi
nop.m 999
(p8) fadd.s1 CEIL_adj = f0,f0
nop.i 999 ;;
}
{ .mfi
(p10) cmp.ge.unc p10,p11 = ceil_GR_exponent, ceil_GR_bigexp
(p6) fnorm f8 = f8
nop.i 999 ;;
}
{ .mfi
nop.m 999
(p11) fcvt.xf CEIL_FLOAT_INT_f8 = CEIL_INT_f8
nop.i 999 ;;
}
{ .mfi
nop.m 999
(p10) fnorm f8 = CEIL_NORM_f8
nop.i 999 ;;
}
// Is -1 < x < 0? If so, result will be -0. Special case it with p14 set.
{ .mfi
nop.m 999
(p8) fcmp.gt.unc.s1 p14,p0 = CEIL_NORM_f8, CEIL_MINUS_ONE
nop.i 999 ;;
}
{ .mfi
(p14) cmp.ne p11,p0 = r0,r0
(p14) fnorm f8 = CEIL_SIGNED_ZERO
nop.i 999
}
{ .mfi
nop.m 999
(p14) fmpy.s0 CEIL_INEXACT = CEIL_FFFF,CEIL_FFFF
nop.i 999 ;;
}
{ .mfi
nop.m 999
(p11) fadd f8 = CEIL_FLOAT_INT_f8,CEIL_adj
nop.i 999 ;;
}
{ .mfi
nop.m 999
(p11) fcmp.eq.unc.s1 p12,p13 = CEIL_FLOAT_INT_f8, CEIL_NORM_f8
nop.i 999 ;;
}
// Set inexact if result not equal to input
{ .mfi
nop.m 999
(p13) fmpy.s0 CEIL_INEXACT = CEIL_FFFF,CEIL_FFFF
nop.i 999
}
// Set result to input if integer
{ .mfb
nop.m 999
(p12) fnorm f8 = CEIL_NORM_f8
br.ret.sptk b0 ;;
}
// Here if input denorm
L(CEIL_DENORM):
{ .mfb
getf.exp ceil_GR_signexp = CEIL_NORM_f8
fcvt.fx.trunc.s1 CEIL_INT_f8 = CEIL_NORM_f8
br.cond.sptk L(CEIL_COMMON) ;;
}
.endp ceill
ASM_SIZE_DIRECTIVE(ceill)

3488
sysdeps/ia64/fpu/s_cos.S Normal file

File diff suppressed because it is too large Load Diff

686
sysdeps/ia64/fpu/s_cosf.S Normal file
View File

@ -0,0 +1,686 @@
.file "sincosf.s"
// Copyright (c) 2000, 2001, Intel Corporation
// All rights reserved.
//
// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
//
// WARRANTY DISCLAIMER
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
// http://developer.intel.com/opensource.
// History
//==============================================================
// 2/02/00 Initial revision
// 4/02/00 Unwind support added.
// 5/10/00 Improved speed with new algorithm.
// 8/08/00 Improved speed by avoiding SIR flush.
// 8/17/00 Changed predicate register macro-usage to direct predicate
// names due to an assembler bug.
// 8/30/00 Put sin_of_r before sin_tbl_S_cos_of_r to gain a cycle
// 1/02/00 Fixed flag settings, improved speed.
//
// API
//==============================================================
// float sinf( float x);
// float cosf( float x);
//
#include "libm_support.h"
// Assembly macros
//==============================================================
// SIN_Sin_Flag = p6
// SIN_Cos_Flag = p7
// integer registers used
SIN_AD_PQ_1 = r33
SIN_AD_PQ_2 = r33
sin_GR_sincos_flag = r34
sin_GR_Mint = r35
sin_GR_index = r36
gr_tmp = r37
GR_SAVE_B0 = r37
GR_SAVE_GP = r38
GR_SAVE_PFS = r39
// floating point registers used
sin_coeff_P1 = f32
sin_coeff_P2 = f33
sin_coeff_Q1 = f34
sin_coeff_Q2 = f35
sin_coeff_P4 = f36
sin_coeff_P5 = f37
sin_coeff_Q3 = f38
sin_coeff_Q4 = f39
sin_Mx = f40
sin_Mfloat = f41
sin_tbl_S = f42
sin_tbl_C = f43
sin_r = f44
sin_rcube = f45
sin_tsq = f46
sin_r7 = f47
sin_t = f48
sin_poly_p2 = f49
sin_poly_p1 = f50
fp_tmp = f51
sin_poly_p3 = f52
sin_poly_p4 = f53
sin_of_r = f54
sin_S_t = f55
sin_poly_q2 = f56
sin_poly_q1 = f57
sin_S_tcube = f58
sin_poly_q3 = f59
sin_poly_q4 = f60
sin_tbl_S_tcube = f61
sin_tbl_S_cos_of_r = f62
sin_coeff_Q5 = f63
sin_coeff_Q6 = f64
sin_coeff_P3 = f65
sin_poly_q5 = f66
sin_poly_q12 = f67
sin_poly_q3456 = f68
fp_tmp2 = f69
SIN_NORM_f8 = f70
#ifdef _LIBC
.rodata
#else
.data
#endif
.align 16
sin_coeff_1_table:
ASM_TYPE_DIRECTIVE(sin_coeff_1_table,@object)
data8 0xBF56C16C16BF6462 // q3
data8 0x3EFA01A0128B9EBC // q4
data8 0xBE927E42FDF33FFE // q5
data8 0x3E21DA5C72A446F3 // q6
data8 0x3EC71DD1D5E421A4 // p4
data8 0xBE5AC5C9D0ACF95A // p5
data8 0xBFC55555555554CA // p1
data8 0x3F811111110F2395 // p2
data8 0xBFE0000000000000 // q1
data8 0x3FA55555555554EF // q2
data8 0xBF2A01A011232913 // p3
data8 0x0000000000000000 // pad
/////////////////////////////////////////
data8 0xBFE1A54991426566 //sin(-32)
data8 0x3FEAB1F5305DE8E5 //cos(-32)
data8 0x3FD9DBC0B640FC81 //sin(-31)
data8 0x3FED4591C3E12A20 //cos(-31)
data8 0x3FEF9DF47F1C903D //sin(-30)
data8 0x3FC3BE82F2505A52 //cos(-30)
data8 0x3FE53C7D20A6C9E7 //sin(-29)
data8 0xBFE7F01658314E47 //cos(-29)
data8 0xBFD156853B4514D6 //sin(-28)
data8 0xBFEECDAAD1582500 //cos(-28)
data8 0xBFEE9AA1B0E5BA30 //sin(-27)
data8 0xBFD2B266F959DED5 //cos(-27)
data8 0xBFE866E0FAC32583 //sin(-26)
data8 0x3FE4B3902691A9ED //cos(-26)
data8 0x3FC0F0E6F31E809D //sin(-25)
data8 0x3FEFB7EEF59504FF //cos(-25)
data8 0x3FECFA7F7919140F //sin(-24)
data8 0x3FDB25BFB50A609A //cos(-24)
data8 0x3FEB143CD0247D02 //sin(-23)
data8 0xBFE10CF7D591F272 //cos(-23)
data8 0x3F8220A29F6EB9F4 //sin(-22)
data8 0xBFEFFFADD8D4ACDA //cos(-22)
data8 0xBFEAC5E20BB0D7ED //sin(-21)
data8 0xBFE186FF83773759 //cos(-21)
data8 0xBFED36D8F55D3CE0 //sin(-20)
data8 0x3FDA1E043964A83F //cos(-20)
data8 0xBFC32F2D28F584CF //sin(-19)
data8 0x3FEFA377DE108258 //cos(-19)
data8 0x3FE8081668131E26 //sin(-18)
data8 0x3FE52150815D2470 //cos(-18)
data8 0x3FEEC3C4AC42882B //sin(-17)
data8 0xBFD19C46B07F58E7 //cos(-17)
data8 0x3FD26D02085F20F8 //sin(-16)
data8 0xBFEEA5257E962F74 //cos(-16)
data8 0xBFE4CF2871CEC2E8 //sin(-15)
data8 0xBFE84F5D069CA4F3 //cos(-15)
data8 0xBFEFB30E327C5E45 //sin(-14)
data8 0x3FC1809AEC2CA0ED //cos(-14)
data8 0xBFDAE4044881C506 //sin(-13)
data8 0x3FED09CDD5260CB7 //cos(-13)
data8 0x3FE12B9AF7D765A5 //sin(-12)
data8 0x3FEB00DA046B65E3 //cos(-12)
data8 0x3FEFFFEB762E93EB //sin(-11)
data8 0x3F7220AE41EE2FDF //cos(-11)
data8 0x3FE1689EF5F34F52 //sin(-10)
data8 0xBFEAD9AC890C6B1F //cos(-10)
data8 0xBFDA6026360C2F91 //sin( -9)
data8 0xBFED27FAA6A6196B //cos( -9)
data8 0xBFEFA8D2A028CF7B //sin( -8)
data8 0xBFC29FBEBF632F94 //cos( -8)
data8 0xBFE50608C26D0A08 //sin( -7)
data8 0x3FE81FF79ED92017 //cos( -7)
data8 0x3FD1E1F18AB0A2C0 //sin( -6)
data8 0x3FEEB9B7097822F5 //cos( -6)
data8 0x3FEEAF81F5E09933 //sin( -5)
data8 0x3FD22785706B4AD9 //cos( -5)
data8 0x3FE837B9DDDC1EAE //sin( -4)
data8 0xBFE4EAA606DB24C1 //cos( -4)
data8 0xBFC210386DB6D55B //sin( -3)
data8 0xBFEFAE04BE85E5D2 //cos( -3)
data8 0xBFED18F6EAD1B446 //sin( -2)
data8 0xBFDAA22657537205 //cos( -2)
data8 0xBFEAED548F090CEE //sin( -1)
data8 0x3FE14A280FB5068C //cos( -1)
data8 0x0000000000000000 //sin( 0)
data8 0x3FF0000000000000 //cos( 0)
data8 0x3FEAED548F090CEE //sin( 1)
data8 0x3FE14A280FB5068C //cos( 1)
data8 0x3FED18F6EAD1B446 //sin( 2)
data8 0xBFDAA22657537205 //cos( 2)
data8 0x3FC210386DB6D55B //sin( 3)
data8 0xBFEFAE04BE85E5D2 //cos( 3)
data8 0xBFE837B9DDDC1EAE //sin( 4)
data8 0xBFE4EAA606DB24C1 //cos( 4)
data8 0xBFEEAF81F5E09933 //sin( 5)
data8 0x3FD22785706B4AD9 //cos( 5)
data8 0xBFD1E1F18AB0A2C0 //sin( 6)
data8 0x3FEEB9B7097822F5 //cos( 6)
data8 0x3FE50608C26D0A08 //sin( 7)
data8 0x3FE81FF79ED92017 //cos( 7)
data8 0x3FEFA8D2A028CF7B //sin( 8)
data8 0xBFC29FBEBF632F94 //cos( 8)
data8 0x3FDA6026360C2F91 //sin( 9)
data8 0xBFED27FAA6A6196B //cos( 9)
data8 0xBFE1689EF5F34F52 //sin( 10)
data8 0xBFEAD9AC890C6B1F //cos( 10)
data8 0xBFEFFFEB762E93EB //sin( 11)
data8 0x3F7220AE41EE2FDF //cos( 11)
data8 0xBFE12B9AF7D765A5 //sin( 12)
data8 0x3FEB00DA046B65E3 //cos( 12)
data8 0x3FDAE4044881C506 //sin( 13)
data8 0x3FED09CDD5260CB7 //cos( 13)
data8 0x3FEFB30E327C5E45 //sin( 14)
data8 0x3FC1809AEC2CA0ED //cos( 14)
data8 0x3FE4CF2871CEC2E8 //sin( 15)
data8 0xBFE84F5D069CA4F3 //cos( 15)
data8 0xBFD26D02085F20F8 //sin( 16)
data8 0xBFEEA5257E962F74 //cos( 16)
data8 0xBFEEC3C4AC42882B //sin( 17)
data8 0xBFD19C46B07F58E7 //cos( 17)
data8 0xBFE8081668131E26 //sin( 18)
data8 0x3FE52150815D2470 //cos( 18)
data8 0x3FC32F2D28F584CF //sin( 19)
data8 0x3FEFA377DE108258 //cos( 19)
data8 0x3FED36D8F55D3CE0 //sin( 20)
data8 0x3FDA1E043964A83F //cos( 20)
data8 0x3FEAC5E20BB0D7ED //sin( 21)
data8 0xBFE186FF83773759 //cos( 21)
data8 0xBF8220A29F6EB9F4 //sin( 22)
data8 0xBFEFFFADD8D4ACDA //cos( 22)
data8 0xBFEB143CD0247D02 //sin( 23)
data8 0xBFE10CF7D591F272 //cos( 23)
data8 0xBFECFA7F7919140F //sin( 24)
data8 0x3FDB25BFB50A609A //cos( 24)
data8 0xBFC0F0E6F31E809D //sin( 25)
data8 0x3FEFB7EEF59504FF //cos( 25)
data8 0x3FE866E0FAC32583 //sin( 26)
data8 0x3FE4B3902691A9ED //cos( 26)
data8 0x3FEE9AA1B0E5BA30 //sin( 27)
data8 0xBFD2B266F959DED5 //cos( 27)
data8 0x3FD156853B4514D6 //sin( 28)
data8 0xBFEECDAAD1582500 //cos( 28)
data8 0xBFE53C7D20A6C9E7 //sin( 29)
data8 0xBFE7F01658314E47 //cos( 29)
data8 0xBFEF9DF47F1C903D //sin( 30)
data8 0x3FC3BE82F2505A52 //cos( 30)
data8 0xBFD9DBC0B640FC81 //sin( 31)
data8 0x3FED4591C3E12A20 //cos( 31)
data8 0x3FE1A54991426566 //sin( 32)
data8 0x3FEAB1F5305DE8E5 //cos( 32)
ASM_SIZE_DIRECTIVE(sin_coeff_1_table)
//////////////////////////////////////////
.global sinf
.global cosf
#ifdef _LIBC
.global __sinf
.global __cosf
#endif
.text
.proc cosf
#ifdef _LIBC
.proc __cosf
#endif
.align 32
cosf:
#ifdef _LIBC
__cosf:
#endif
{ .mfi
alloc r32 = ar.pfs,1,7,0,0
fcvt.fx.s1 sin_Mx = f8
cmp.ne p6,p7 = r0,r0 // p7 set if cos
}
{ .mfi
addl SIN_AD_PQ_1 = @ltoff(sin_coeff_1_table),gp
fnorm.s0 SIN_NORM_f8 = f8 // Sets denormal or invalid
mov sin_GR_sincos_flag = 0x0
}
;;
{ .mfi
ld8 SIN_AD_PQ_1 = [SIN_AD_PQ_1]
fclass.m.unc p9,p0 = f8, 0x07
cmp.ne p8,p0 = r0,r0
}
{ .mfb
nop.m 999
nop.f 999
br.sptk L(SINCOSF_COMMON)
}
;;
.endp cosf
ASM_SIZE_DIRECTIVE(cosf)
.text
.proc sinf
#ifdef _LIBC
.proc __sinf
#endif
.align 32
sinf:
#ifdef _LIBC
__sinf:
#endif
{ .mfi
alloc r32 = ar.pfs,1,7,0,0
fcvt.fx.s1 sin_Mx = f8
cmp.eq p6,p7 = r0,r0 // p6 set if sin
}
{ .mfi
addl SIN_AD_PQ_1 = @ltoff(sin_coeff_1_table),gp
fnorm.s0 SIN_NORM_f8 = f8 // Sets denormal or invalid
mov sin_GR_sincos_flag = 0x1
}
;;
{ .mfi
ld8 SIN_AD_PQ_1 = [SIN_AD_PQ_1]
fclass.m.unc p8,p0 = f8, 0x07
cmp.ne p9,p0 = r0,r0
}
{ .mfb
nop.m 999
nop.f 999
br.sptk L(SINCOSF_COMMON)
}
;;
L(SINCOSF_COMMON):
// Here with p6 if sin, p7 if cos, p8 if sin(0), p9 if cos(0)
{ .mmf
ldfpd sin_coeff_Q3, sin_coeff_Q4 = [SIN_AD_PQ_1], 16
nop.m 999
fclass.m.unc p11,p0 = f8, 0x23 // Test for x=inf
}
;;
{ .mfb
ldfpd sin_coeff_Q5, sin_coeff_Q6 = [SIN_AD_PQ_1], 16
fclass.m.unc p10,p0 = f8, 0xc3 // Test for x=nan
(p8) br.ret.spnt b0 // Exit for sin(0)
}
{ .mfb
nop.m 999
(p9) fma.s f8 = f1,f1,f0
(p9) br.ret.spnt b0 // Exit for cos(0)
}
;;
{ .mmf
ldfpd sin_coeff_P4, sin_coeff_P5 = [SIN_AD_PQ_1], 16
addl gr_tmp = -1,r0
fcvt.xf sin_Mfloat = sin_Mx
}
;;
{ .mfi
getf.sig sin_GR_Mint = sin_Mx
(p11) frcpa.s0 f8,p13 = f0,f0 // qnan indef if x=inf
nop.i 999
}
{ .mfb
ldfpd sin_coeff_P1, sin_coeff_P2 = [SIN_AD_PQ_1], 16
nop.f 999
(p11) br.ret.spnt b0 // Exit for x=inf
}
;;
{ .mfi
ldfpd sin_coeff_Q1, sin_coeff_Q2 = [SIN_AD_PQ_1], 16
nop.f 999
cmp.ge p8,p9 = -33,sin_GR_Mint
}
{ .mfb
add sin_GR_index = 32,sin_GR_Mint
(p10) fma.s f8 = f8,f1,f0 // Force qnan if x=nan
(p10) br.ret.spnt b0 // Exit for x=nan
}
;;
{ .mmi
ldfd sin_coeff_P3 = [SIN_AD_PQ_1], 16
(p9) cmp.le p8,p0 = 33, sin_GR_Mint
shl sin_GR_index = sin_GR_index,4
}
;;
{ .mfi
setf.sig fp_tmp = gr_tmp // Create constant such that fmpy sets inexact
fnma.s1 sin_r = f1,sin_Mfloat,SIN_NORM_f8
(p8) cmp.eq.unc p11,p12=sin_GR_sincos_flag,r0 // p11 if must call dbl cos
// p12 if must call dbl sin
}
{ .mbb
add SIN_AD_PQ_2 = sin_GR_index,SIN_AD_PQ_1
(p11) br.cond.spnt COS_DOUBLE
(p12) br.cond.spnt SIN_DOUBLE
}
;;
.pred.rel "mutex",p6,p7 //SIN_Sin_Flag, SIN_Cos_Flag
{ .mmi
(p6) ldfpd sin_tbl_S,sin_tbl_C = [SIN_AD_PQ_2]
(p7) ldfpd sin_tbl_C,sin_tbl_S = [SIN_AD_PQ_2]
nop.i 999
}
;;
{ .mfi
nop.m 999
(p6) fclass.m.unc p8,p0 = f8, 0x0b // If sin, note denormal input to set uflow
nop.i 999
}
{ .mfi
nop.m 999
fma.s1 sin_t = sin_r,sin_r,f0
nop.i 999
}
;;
{ .mfi
nop.m 999
fma.s1 sin_rcube = sin_t,sin_r,f0
nop.i 999
}
{ .mfi
nop.m 999
fma.s1 sin_tsq = sin_t,sin_t,f0
nop.i 999
}
;;
{ .mfi
nop.m 999
fma.s1 sin_poly_q3 = sin_t,sin_coeff_Q4,sin_coeff_Q3
nop.i 999
}
{ .mfi
nop.m 999
fma.s1 sin_poly_q5 = sin_t,sin_coeff_Q6,sin_coeff_Q5
nop.i 999
}
;;
{ .mfi
nop.m 999
fma.s1 sin_poly_p1 = sin_t,sin_coeff_P5,sin_coeff_P4
nop.i 999
}
{ .mfi
nop.m 999
fma.s1 sin_poly_p2 = sin_t,sin_coeff_P2,sin_coeff_P1
nop.i 999
}
;;
{ .mfi
nop.m 999
fma.s1 sin_poly_q1 = sin_t,sin_coeff_Q2,sin_coeff_Q1
nop.i 999
}
{ .mfi
nop.m 999
fma.s1 sin_S_t = sin_t,sin_tbl_S,f0
nop.i 999
}
;;
{ .mfi
nop.m 999
(p8) fmpy.s.s0 fp_tmp2 = f8,f8 // Dummy mult to set underflow if sin(denormal)
nop.i 999
}
{ .mfi
nop.m 999
fma.s1 sin_r7 = sin_rcube,sin_tsq,f0
nop.i 999
}
;;
{ .mfi
nop.m 999
fma.s1 sin_poly_q3456 = sin_tsq,sin_poly_q5,sin_poly_q3
nop.i 999
}
;;
{ .mfi
nop.m 999
fma.s1 sin_poly_p3 = sin_t,sin_poly_p1,sin_coeff_P3
nop.i 999
}
{ .mfi
nop.m 999
fma.s1 sin_poly_p4 = sin_rcube,sin_poly_p2,sin_r
nop.i 999
}
;;
{ .mfi
nop.m 999
fma.s1 sin_tbl_S_tcube = sin_S_t,sin_tsq,f0
nop.i 999
}
{ .mfi
nop.m 999
fma.s1 sin_poly_q12 = sin_S_t,sin_poly_q1,sin_tbl_S
nop.i 999
}
;;
{ .mfi
nop.m 999
fma.d.s1 sin_of_r = sin_r7,sin_poly_p3,sin_poly_p4
nop.i 999
}
;;
{ .mfi
nop.m 999
fma.d.s1 sin_tbl_S_cos_of_r = sin_tbl_S_tcube,sin_poly_q3456,sin_poly_q12
nop.i 999
}
{ .mfi
nop.m 999
fmpy.s0 fp_tmp = fp_tmp, fp_tmp // Dummy mult to set inexact
nop.i 999
}
;;
.pred.rel "mutex",p6,p7 //SIN_Sin_Flag, SIN_Cos_Flag
{ .mfi
nop.m 999
//(SIN_Sin_Flag) fma.s f8 = sin_tbl_C,sin_of_r,sin_tbl_S_cos_of_r
(p6) fma.s f8 = sin_tbl_C,sin_of_r,sin_tbl_S_cos_of_r
nop.i 999
}
{ .mfb
nop.m 999
//(SIN_Cos_Flag) fnma.s f8 = sin_tbl_C,sin_of_r,sin_tbl_S_cos_of_r
(p7) fnma.s f8 = sin_tbl_C,sin_of_r,sin_tbl_S_cos_of_r
br.ret.sptk b0
}
.endp sinf
ASM_SIZE_DIRECTIVE(sinf)
.proc SIN_DOUBLE
SIN_DOUBLE:
.prologue
{ .mfi
nop.m 0
nop.f 0
.save ar.pfs,GR_SAVE_PFS
mov GR_SAVE_PFS=ar.pfs
}
;;
{ .mfi
mov GR_SAVE_GP=gp
nop.f 0
.save b0, GR_SAVE_B0
mov GR_SAVE_B0=b0
}
.body
{ .mmb
nop.m 999
nop.m 999
br.call.sptk.many b0=sin
}
;;
{ .mfi
mov gp = GR_SAVE_GP
nop.f 999
mov b0 = GR_SAVE_B0
}
;;
{ .mfi
nop.m 999
fma.s f8 = f8,f1,f0
(p0) mov ar.pfs = GR_SAVE_PFS
}
{ .mib
nop.m 999
nop.i 999
(p0) br.ret.sptk b0
}
;;
.endp SIN_DOUBLE
ASM_SIZE_DIRECTIVE(SIN_DOUBLE)
.proc COS_DOUBLE
COS_DOUBLE:
.prologue
{ .mfi
nop.m 0
nop.f 0
.save ar.pfs,GR_SAVE_PFS
mov GR_SAVE_PFS=ar.pfs
}
;;
{ .mfi
mov GR_SAVE_GP=gp
nop.f 0
.save b0, GR_SAVE_B0
mov GR_SAVE_B0=b0
}
.body
{ .mmb
nop.m 999
nop.m 999
br.call.sptk.many b0=cos
}
;;
{ .mfi
mov gp = GR_SAVE_GP
nop.f 999
mov b0 = GR_SAVE_B0
}
;;
{ .mfi
nop.m 999
fma.s f8 = f8,f1,f0
(p0) mov ar.pfs = GR_SAVE_PFS
}
{ .mib
nop.m 999
nop.i 999
(p0) br.ret.sptk b0
}
;;
.endp COS_DOUBLE
ASM_SIZE_DIRECTIVE(COS_DOUBLE)
.type sin,@function
.global sin
.type cos,@function
.global cos

2506
sysdeps/ia64/fpu/s_cosl.S Normal file

File diff suppressed because it is too large Load Diff

1755
sysdeps/ia64/fpu/s_expm1.S Normal file

File diff suppressed because it is too large Load Diff

1742
sysdeps/ia64/fpu/s_expm1f.S Normal file

File diff suppressed because it is too large Load Diff

1603
sysdeps/ia64/fpu/s_expm1l.S Normal file

File diff suppressed because it is too large Load Diff

227
sysdeps/ia64/fpu/s_floor.S Normal file
View File

@ -0,0 +1,227 @@
.file "floor.s"
// Copyright (c) 2000, 2001, Intel Corporation
// All rights reserved.
//
// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
//
// WARRANTY DISCLAIMER
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
// http://developer.intel.com/opensource.
//
.align 32
.global floor#
.section .text
.proc floor#
.align 32
// History
//==============================================================
// 2/02/00: Initial version
// 3/22/00: Updated to improve performance
// 6/13/00: Improved speed, fixed setting of inexact flag
// 6/27/00: Eliminated incorrect invalid flag setting
// 2/07/01: Corrected sign of zero result in round to -inf mode
// API
//==============================================================
// double floor(double x)
// general input registers:
floor_GR_FFFF = r14
floor_GR_signexp = r15
floor_GR_exponent = r16
floor_GR_expmask = r17
floor_GR_bigexp = r18
// predicate registers used:
// p6 ==> Input is NaN, infinity, zero
// p7 ==> Input is denormal
// p8 ==> Input is <0
// p9 ==> Input is >=0
// p10 ==> Input is already an integer (bigger than largest integer)
// p11 ==> Input is not a large integer
// p12 ==> Input is a smaller integer
// p13 ==> Input is not an even integer, so inexact must be set
// floating-point registers used:
FLOOR_NORM_f8 = f9
FLOOR_FFFF = f10
FLOOR_INEXACT = f11
FLOOR_FLOAT_INT_f8 = f12
FLOOR_INT_f8 = f13
FLOOR_adj = f14
// Overview of operation
//==============================================================
// double floor(double x)
// Return an integer value (represented as a double) that is the largest
// value not greater than x
// This is x rounded toward -infinity to an integral value.
// Inexact is set if x != floor(x)
// **************************************************************************
// Set denormal flag for denormal input and
// and take denormal fault if necessary.
// Is the input an integer value already?
// double_extended
// if the exponent is > 1003e => 3F(true) = 63(decimal)
// we have a significand of 64 bits 1.63-bits.
// If we multiply by 2^63, we no longer have a fractional part
// So input is an integer value already.
// double
// if the exponent is >= 10033 => 34(true) = 52(decimal)
// 34 + 3ff = 433
// we have a significand of 53 bits 1.52-bits. (implicit 1)
// If we multiply by 2^52, we no longer have a fractional part
// So input is an integer value already.
// single
// if the exponent is > 10016 => 17(true) = 23(decimal)
// we have a significand of 24 bits 1.23-bits. (implicit 1)
// If we multiply by 2^23, we no longer have a fractional part
// So input is an integer value already.
// If x is NAN, ZERO, or INFINITY, then return
// qnan snan inf norm unorm 0 -+
// 1 1 1 0 0 1 11 0xe7
#include "libm_support.h"
floor:
#ifdef _LIBC
.global __floor
__floor:
#endif
{ .mfi
getf.exp floor_GR_signexp = f8
fcvt.fx.trunc.s1 FLOOR_INT_f8 = f8
addl floor_GR_bigexp = 0x10033, r0
}
{ .mfi
addl floor_GR_FFFF = -1,r0
fcmp.lt.s1 p8,p9 = f8,f0
mov floor_GR_expmask = 0x1FFFF ;;
}
// p7 ==> denorm
{ .mfi
setf.sig FLOOR_FFFF = floor_GR_FFFF
fclass.m p7,p0 = f8, 0x0b
nop.i 999
}
{ .mfi
nop.m 999
fnorm.s1 FLOOR_NORM_f8 = f8
nop.i 999 ;;
}
// p6 ==> NAN, INF, ZERO
{ .mfb
nop.m 999
fclass.m p6,p10 = f8, 0xe7
(p7) br.cond.spnt L(FLOOR_DENORM) ;;
}
L(FLOOR_COMMON):
.pred.rel "mutex",p8,p9
// Set adjustment to subtract from trunc(x) for result
// If x<0, adjustment is -1.0
// If x>=0, adjustment is 0.0
{ .mfi
and floor_GR_exponent = floor_GR_signexp, floor_GR_expmask
(p8) fnma.s1 FLOOR_adj = f1,f1,f0
nop.i 999
}
{ .mfi
nop.m 999
(p9) fadd.s1 FLOOR_adj = f0,f0
nop.i 999 ;;
}
{ .mfi
nop.m 999
fcmp.eq.s0 p12,p0 = f8,f0 // Dummy op to set denormal and invalid flag
nop.i 999
}
{ .mfi
(p10) cmp.ge.unc p10,p11 = floor_GR_exponent, floor_GR_bigexp
(p6) fnorm.d f8 = f8
nop.i 999 ;;
}
{ .mfi
nop.m 999
(p11) fcvt.xf FLOOR_FLOAT_INT_f8 = FLOOR_INT_f8
nop.i 999 ;;
}
{ .mfi
nop.m 999
(p10) fnorm.d f8 = FLOOR_NORM_f8
nop.i 999 ;;
}
{ .mfi
nop.m 999
(p11) fadd.d f8 = FLOOR_FLOAT_INT_f8,FLOOR_adj
nop.i 999 ;;
}
{ .mfi
nop.m 999
(p11) fcmp.eq.unc.s1 p12,p13 = FLOOR_FLOAT_INT_f8, FLOOR_NORM_f8
nop.i 999 ;;
}
// Set inexact if result not equal to input
{ .mfi
nop.m 999
(p13) fmpy.s0 FLOOR_INEXACT = FLOOR_FFFF,FLOOR_FFFF
nop.i 999
}
// Set result to input if integer
{ .mfb
nop.m 999
(p12) fnorm.d f8 = FLOOR_NORM_f8
br.ret.sptk b0 ;;
}
// Here if input denorm
L(FLOOR_DENORM):
{ .mfb
getf.exp floor_GR_signexp = FLOOR_NORM_f8
fcvt.fx.trunc.s1 FLOOR_INT_f8 = FLOOR_NORM_f8
br.cond.sptk L(FLOOR_COMMON) ;;
}
.endp floor
ASM_SIZE_DIRECTIVE(floor)

224
sysdeps/ia64/fpu/s_floorf.S Normal file
View File

@ -0,0 +1,224 @@
.file "floorf.s"
// Copyright (c) 2000, 2001, Intel Corporation
// All rights reserved.
//
// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
//
// WARRANTY DISCLAIMER
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
// http://developer.intel.com/opensource.
//
.align 32
.global floorf#
.section .text
.proc floorf#
.align 32
// History
//==============================================================
// 2/02/00: Initial version
// 6/13/00: Improved speed
// 6/27/00: Eliminated incorrect invalid flag setting
// 2/07/01: Corrected sign of zero result in round to -inf mode
// API
//==============================================================
// float floorf(float x)
// general input registers:
floor_GR_FFFF = r14
floor_GR_signexp = r15
floor_GR_exponent = r16
floor_GR_expmask = r17
floor_GR_bigexp = r18
// predicate registers used:
// p6 ==> Input is NaN, infinity, zero
// p7 ==> Input is denormal
// p8 ==> Input is <0
// p9 ==> Input is >=0
// p10 ==> Input is already an integer (bigger than largest integer)
// p11 ==> Input is not a large integer
// p12 ==> Input is a smaller integer
// p13 ==> Input is not an even integer, so inexact must be set
// floating-point registers used:
FLOOR_NORM_f8 = f9
FLOOR_FFFF = f10
FLOOR_INEXACT = f11
FLOOR_FLOAT_INT_f8 = f12
FLOOR_INT_f8 = f13
FLOOR_adj = f14
// Overview of operation
//==============================================================
// float floorf(float x)
// Return an integer value (represented as a float) that is the largest
// value not greater than x
// This is x rounded toward -infinity to an integral value.
// Inexact is set if x != floorf(x)
// **************************************************************************
// Set denormal flag for denormal input and
// and take denormal fault if necessary.
// Is the input an integer value already?
// double_extended
// if the exponent is > 1003e => 3F(true) = 63(decimal)
// we have a significand of 64 bits 1.63-bits.
// If we multiply by 2^63, we no longer have a fractional part
// So input is an integer value already.
// double
// if the exponent is >= 10033 => 34(true) = 52(decimal)
// 34 + 3ff = 433
// we have a significand of 53 bits 1.52-bits. (implicit 1)
// If we multiply by 2^52, we no longer have a fractional part
// So input is an integer value already.
// single
// if the exponent is > 10016 => 17(true) = 23(decimal)
// we have a significand of 24 bits 1.23-bits. (implicit 1)
// If we multiply by 2^23, we no longer have a fractional part
// So input is an integer value already.
// If x is NAN, ZERO, or INFINITY, then return
// qnan snan inf norm unorm 0 -+
// 1 1 1 0 0 1 11 0xe7
#include "libm_support.h"
floorf:
#ifdef _LIBC
.global __floorf
__floorf:
#endif
{ .mfi
getf.exp floor_GR_signexp = f8
fcvt.fx.trunc.s1 FLOOR_INT_f8 = f8
addl floor_GR_bigexp = 0x10016, r0
}
{ .mfi
addl floor_GR_FFFF = -1,r0
fcmp.lt.s1 p8,p9 = f8,f0
mov floor_GR_expmask = 0x1FFFF ;;
}
// p7 ==> denorm
{ .mfi
setf.sig FLOOR_FFFF = floor_GR_FFFF
fclass.m p7,p0 = f8, 0x0b
nop.i 999
}
{ .mfi
nop.m 999
fnorm.s1 FLOOR_NORM_f8 = f8
nop.i 999 ;;
}
// p6 ==> NAN, INF, ZERO
{ .mfb
nop.m 999
fclass.m p6,p10 = f8, 0xe7
(p7) br.cond.spnt L(FLOOR_DENORM) ;;
}
L(FLOOR_COMMON):
.pred.rel "mutex",p8,p9
// Set adjustment to subtract from trunc(x) for result
// If x<0, adjustment is -1.0
// If x>=0, adjustment is 0.0
{ .mfi
and floor_GR_exponent = floor_GR_signexp, floor_GR_expmask
(p8) fnma.s1 FLOOR_adj = f1,f1,f0
nop.i 999
}
{ .mfi
nop.m 999
(p9) fadd.s1 FLOOR_adj = f0,f0
nop.i 999 ;;
}
{ .mfi
nop.m 999
fcmp.eq.s0 p12,p0 = f8,f0 // Dummy op to set denormal and invalid flag
nop.i 999
}
{ .mfi
(p10) cmp.ge.unc p10,p11 = floor_GR_exponent, floor_GR_bigexp
(p6) fnorm.s f8 = f8
nop.i 999 ;;
}
{ .mfi
nop.m 999
(p11) fcvt.xf FLOOR_FLOAT_INT_f8 = FLOOR_INT_f8
nop.i 999 ;;
}
{ .mfi
nop.m 999
(p10) fnorm.s f8 = FLOOR_NORM_f8
nop.i 999 ;;
}
{ .mfi
nop.m 999
(p11) fadd.s f8 = FLOOR_FLOAT_INT_f8,FLOOR_adj
nop.i 999 ;;
}
{ .mfi
nop.m 999
(p11) fcmp.eq.unc.s1 p12,p13 = FLOOR_FLOAT_INT_f8, FLOOR_NORM_f8
nop.i 999 ;;
}
// Set inexact if result not equal to input
{ .mfi
nop.m 999
(p13) fmpy.s0 FLOOR_INEXACT = FLOOR_FFFF,FLOOR_FFFF
nop.i 999
}
// Set result to input if integer
{ .mfb
nop.m 999
(p12) fnorm.s f8 = FLOOR_NORM_f8
br.ret.sptk b0 ;;
}
// Here if input denorm
L(FLOOR_DENORM):
{ .mfb
getf.exp floor_GR_signexp = FLOOR_NORM_f8
fcvt.fx.trunc.s1 FLOOR_INT_f8 = FLOOR_NORM_f8
br.cond.sptk L(FLOOR_COMMON) ;;
}
.endp floorf
ASM_SIZE_DIRECTIVE(floorf)

224
sysdeps/ia64/fpu/s_floorl.S Normal file
View File

@ -0,0 +1,224 @@
.file "floorl.s"
// Copyright (c) 2000, 2001, Intel Corporation
// All rights reserved.
//
// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
//
// WARRANTY DISCLAIMER
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
// http://developer.intel.com/opensource.
//
.align 32
.global floorl#
.section .text
.proc floorl#
.align 32
// History
//==============================================================
// 2/02/00: Initial version
// 6/13/00: Improved speed
// 6/27/00: Eliminated incorrect invalid flag setting
// 2/07/01: Corrected sign of zero result in round to -inf mode
// API
//==============================================================
// long double floorl(long double x)
// general input registers:
floor_GR_FFFF = r14
floor_GR_signexp = r15
floor_GR_exponent = r16
floor_GR_expmask = r17
floor_GR_bigexp = r18
// predicate registers used:
// p6 ==> Input is NaN, infinity, zero
// p7 ==> Input is denormal
// p8 ==> Input is <0
// p9 ==> Input is >=0
// p10 ==> Input is already an integer (bigger than largest integer)
// p11 ==> Input is not a large integer
// p12 ==> Input is a smaller integer
// p13 ==> Input is not an even integer, so inexact must be set
// floating-point registers used:
FLOOR_NORM_f8 = f9
FLOOR_FFFF = f10
FLOOR_INEXACT = f11
FLOOR_FLOAT_INT_f8 = f12
FLOOR_INT_f8 = f13
FLOOR_adj = f14
// Overview of operation
//==============================================================
// long double floorl(long double x)
// Return an integer value (represented as a long double) that is the largest
// value not greater than x
// This is x rounded toward -infinity to an integral value.
// Inexact is set if x != floorl(x)
// **************************************************************************
// Set denormal flag for denormal input and
// and take denormal fault if necessary.
// Is the input an integer value already?
// double_extended
// if the exponent is > 1003e => 3F(true) = 63(decimal)
// we have a significand of 64 bits 1.63-bits.
// If we multiply by 2^63, we no longer have a fractional part
// So input is an integer value already.
// double
// if the exponent is >= 10033 => 34(true) = 52(decimal)
// 34 + 3ff = 433
// we have a significand of 53 bits 1.52-bits. (implicit 1)
// If we multiply by 2^52, we no longer have a fractional part
// So input is an integer value already.
// single
// if the exponent is > 10016 => 17(true) = 23(decimal)
// we have a significand of 24 bits 1.23-bits. (implicit 1)
// If we multiply by 2^23, we no longer have a fractional part
// So input is an integer value already.
// If x is NAN, ZERO, or INFINITY, then return
// qnan snan inf norm unorm 0 -+
// 1 1 1 0 0 1 11 0xe7
#include "libm_support.h"
floorl:
#ifdef _LIBC
.global __floorl
__floorl:
#endif
{ .mfi
getf.exp floor_GR_signexp = f8
fcvt.fx.trunc.s1 FLOOR_INT_f8 = f8
addl floor_GR_bigexp = 0x1003e, r0
}
{ .mfi
addl floor_GR_FFFF = -1,r0
fcmp.lt.s1 p8,p9 = f8,f0
mov floor_GR_expmask = 0x1FFFF ;;
}
// p7 ==> denorm
{ .mfi
setf.sig FLOOR_FFFF = floor_GR_FFFF
fclass.m p7,p0 = f8, 0x0b
nop.i 999
}
{ .mfi
nop.m 999
fnorm.s1 FLOOR_NORM_f8 = f8
nop.i 999 ;;
}
// p6 ==> NAN, INF, ZERO
{ .mfb
nop.m 999
fclass.m p6,p10 = f8, 0xe7
(p7) br.cond.spnt L(FLOOR_DENORM) ;;
}
L(FLOOR_COMMON):
.pred.rel "mutex",p8,p9
// Set adjustment to subtract from trunc(x) for result
// If x<0, adjustment is -1.0
// If x>=0, adjustment is 0.0
{ .mfi
and floor_GR_exponent = floor_GR_signexp, floor_GR_expmask
(p8) fnma.s1 FLOOR_adj = f1,f1,f0
nop.i 999
}
{ .mfi
nop.m 999
(p9) fadd.s1 FLOOR_adj = f0,f0
nop.i 999 ;;
}
{ .mfi
nop.m 999
fcmp.eq.s0 p12,p0 = f8,f0 // Dummy op to set denormal and invalid flag
nop.i 999
}
{ .mfi
(p10) cmp.ge.unc p10,p11 = floor_GR_exponent, floor_GR_bigexp
(p6) fnorm f8 = f8
nop.i 999 ;;
}
{ .mfi
nop.m 999
(p11) fcvt.xf FLOOR_FLOAT_INT_f8 = FLOOR_INT_f8
nop.i 999 ;;
}
{ .mfi
nop.m 999
(p10) fnorm f8 = FLOOR_NORM_f8
nop.i 999 ;;
}
{ .mfi
nop.m 999
(p11) fadd f8 = FLOOR_FLOAT_INT_f8,FLOOR_adj
nop.i 999 ;;
}
{ .mfi
nop.m 999
(p11) fcmp.eq.unc.s1 p12,p13 = FLOOR_FLOAT_INT_f8, FLOOR_NORM_f8
nop.i 999 ;;
}
// Set inexact if result not equal to input
{ .mfi
nop.m 999
(p13) fmpy.s0 FLOOR_INEXACT = FLOOR_FFFF,FLOOR_FFFF
nop.i 999
}
// Set result to input if integer
{ .mfb
nop.m 999
(p12) fnorm f8 = FLOOR_NORM_f8
br.ret.sptk b0 ;;
}
// Here if input denorm
L(FLOOR_DENORM):
{ .mfb
getf.exp floor_GR_signexp = FLOOR_NORM_f8
fcvt.fx.trunc.s1 FLOOR_INT_f8 = FLOOR_NORM_f8
br.cond.sptk L(FLOOR_COMMON) ;;
}
.endp floorl
ASM_SIZE_DIRECTIVE(floorl)

View File

@ -0,0 +1,44 @@
//
// Copyright (c) 2000, 2001, Intel Corporation
// All rights reserved.
//
// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
//
// WARRANTY DISCLAIMER
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
// http://developer.intel.com/opensource.
//
//
#include "libm_support.h"
double frexp(double x, int *y)
{
#ifdef SIZE_INT_64
return( __libm_frexp_8(x, y) );
#else
#ifdef SIZE_INT_32
return( __libm_frexp_4(x, y) );
#endif
#endif
}

View File

@ -0,0 +1,44 @@
//
// Copyright (c) 2000, 2001, Intel Corporation
// All rights reserved.
//
// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
//
// WARRANTY DISCLAIMER
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
// http://developer.intel.com/opensource.
//
//
#include "libm_support.h"
float frexpf(float x, int *y)
{
#ifdef SIZE_INT_64
return( __libm_frexp_8f(x, y) );
#else
#ifdef SIZE_INT_32
return( __libm_frexp_4f(x, y) );
#endif
#endif
}

View File

@ -0,0 +1,44 @@
//
// Copyright (c) 2000, 2001, Intel Corporation
// All rights reserved.
//
// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
//
// WARRANTY DISCLAIMER
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
// http://developer.intel.com/opensource.
//
//
#include "libm_support.h"
long double frexpl(long double x, int *y)
{
#ifdef SIZE_INT_64
return( __libm_frexp_8l(x, y) );
#else
#ifdef SIZE_INT_32
return( __libm_frexp_4l(x, y) );
#endif
#endif
}

240
sysdeps/ia64/fpu/s_ilogb.S Normal file
View File

@ -0,0 +1,240 @@
.file "ilogb.s"
// Copyright (c) 2000, 2001, Intel Corporation
// All rights reserved.
//
// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
//
// WARRANTY DISCLAIMER
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
// http://developer.intel.com/opensource.
//
// History
//==============================================================
// 2/03/00 Initial version
// 5/26/00 Fix bug when x a double-extended denormal;
// if x=0 call error routine, per C9X
// 8/15/00 Bundle added after call to __libm_error_support to properly
// set [the previously overwritten] GR_Parameter_RESULT.
// 1/20/01 Fixed result for x=0, corrected error tag value.
.align 32
.global ilogb#
.section .text
.proc ilogb#
.align 32
// API
//==============================================================
// int = ilogb(double)
// Overview of operation
//==============================================================
// ilogb computes log2(x) as an int
// and returns it in r8
// ilogb is similar to logb but differs in the following ways:
// +-inf
// ilogb: returns INT_MAX
// logb: returns +inf
// Nan returns FP_ILOGBNAN (which is either INT_MAX or INT_MIN)
// ilogb: returns INT_MAX (7fffffff)
// logb: returns QNAN (quieted SNAN)
// 0 returns FP_ILOGB0 (which is either INT_MIN or -INT_MAX)
// ilogb: returns INT_MIN (80000000)
// logb: returns -inf
// Registers used
//==============================================================
// general local registers:
// ar.pfs r32
// r33 -> r37
// r38 -> r41 used as parameters to error path
// predicate registers used:
// p6 - x nan, inf
// p7 - x 0
// p8 - x norm, unorm
// p9 - x unorm
// floating-point registers used:
// f8 - f10
#include "libm_support.h"
GR_SAVE_PFS = r32
GR_SAVE_B0 = r34
GR_SAVE_GP = r35
GR_Parameter_X = r38
GR_Parameter_Y = r39
GR_Parameter_RESULT = r40
GR_Parameter_TAG = r41
FR_X = f8
FR_Y = f0
FR_RESULT = f0
ilogb:
// Form signexp of 2^64 in case need to scale denormal
{ .mmf
alloc r32=ar.pfs,1,5,4,0
(p0) mov r37 = 0x1003f
(p0) fnorm f9 = f8 ;;
}
// Form 2^64 in case need to scale denormal
{ .mfi
(p0) setf.exp f10 = r37
(p0) fclass.m.unc p7, p8 = f8, 0xe3
(p0) mov r34 = 0xffff ;;
}
// qnan snan inf norm unorm 0 -+
// 1 1 1 0 0 0 11
// e 3
// X ZERO, returns INT_MIN
// X INF or NAN, returns INT_MAX
{ .mfi
(p0) mov r35 = 0x1ffff
(p8) fclass.m.unc p6, p8 = f8, 0x07
nop.i 999 ;;
}
{ .mlx
nop.m 999
(p7) movl r8 = 0x000000007fffffff ;;
}
{ .mib
nop.m 999
nop.i 999
(p6) br.cond.spnt L(ILOGB_ZERO) ;;
}
// Test for denormal
{ .mfi
nop.m 999
(p8) fclass.m.unc p9, p0 = f9, 0x0b
nop.i 999 ;;
}
L(ILOGB_COMMON):
// X NORMAL returns true exponent
{ .mmi
nop.m 999
(p8) getf.exp r33 = f9
nop.i 999 ;;
}
// If denormal add 64 to exponent bias for scaling
{ .mfb
(p9) add r34 = 64, r34
nop.f 999
(p9) br.cond.spnt L(ILOGB_DENORM) ;;
}
{ .mmi
(p8) and r36 = r35, r33
nop.m 999
nop.i 999 ;;
}
{ .mib
(p8) sub r8 = r36, r34
nop.i 999
(p0) br.ret.sptk b0 ;;
}
L(ILOGB_DENORM):
// Here if x denormal
// Form x * 2^64 which is normal
// Return to common code
{ .mfb
cmp.eq p8,p9 = r0,r0
fmpy f9 = f9, f10
br.cond.sptk L(ILOGB_COMMON) ;;
}
// X ZERO
// return INT_MIN, call error support
L(ILOGB_ZERO):
{.mlx
mov GR_Parameter_TAG = 157
(p6) movl r33 = 0x0000000080000000 ;;
};;
.endp ilogb
ASM_SIZE_DIRECTIVE(ilogb)
.proc __libm_error_region
__libm_error_region:
.prologue
{ .mfi
add GR_Parameter_Y=-32,sp // Parameter 2 value
nop.f 0
.save ar.pfs,GR_SAVE_PFS
mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
}
{ .mfi
.fframe 64
add sp=-64,sp // Create new stack
nop.f 0
mov GR_SAVE_GP=gp // Save gp
};;
{ .mmi
stfd [GR_Parameter_Y] = FR_Y,16 // Save Parameter 2 on stack
add GR_Parameter_X = 16,sp // Parameter 1 address
.save b0, GR_SAVE_B0
mov GR_SAVE_B0=b0 // Save b0
};;
.body
{ .mib
stfd [GR_Parameter_X] = FR_X // Store Parameter 1 on stack
add GR_Parameter_RESULT = 0,GR_Parameter_Y
nop.b 0 // Parameter 3 address
}
{ .mib
stfd [GR_Parameter_Y] = FR_RESULT // Store Parameter 3 on stack
add GR_Parameter_Y = -16,GR_Parameter_Y
br.call.sptk b0=__libm_error_support# // Call error handling function
};;
{ .mmi
nop.m 0
nop.m 0
add GR_Parameter_RESULT = 48,sp
};;
{ .mmi
mov r8 = r33 // Store result
.restore sp
add sp = 64,sp // Restore stack pointer
mov b0 = GR_SAVE_B0 // Restore return address
};;
{ .mib
mov gp = GR_SAVE_GP // Restore gp
mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
br.ret.sptk b0 // Return
};;
.endp __libm_error_region
ASM_SIZE_DIRECTIVE(__libm_error_region)
.type __libm_error_support#,@function
.global __libm_error_support#

240
sysdeps/ia64/fpu/s_ilogbf.S Normal file
View File

@ -0,0 +1,240 @@
.file "ilogbf.s"
// Copyright (c) 2000, 2001, Intel Corporation
// All rights reserved.
//
// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
//
// WARRANTY DISCLAIMER
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
// http://developer.intel.com/opensource.
//
// History
//==============================================================
// 2/03/00 Initial version
// 5/26/00 Fix bug when x a double-extended denormal;
// if x=0 call error routine, per C9X
// 8/15/00 Bundle added after call to __libm_error_support to properly
// set [the previously overwritten] GR_Parameter_RESULT.
// 1/20/01 Fixed result for x=0
.align 32
.global ilogbf#
.section .text
.proc ilogbf#
.align 32
// API
//==============================================================
// int = ilogbf(float)
// Overview of operation
//==============================================================
// ilogbf computes log2(x) as an int
// and returns it in r8
// ilogbf is similar to logbf but differs in the following ways:
// +-inf
// ilogbf: returns INT_MAX
// logbf: returns +inf
// Nan returns FP_ILOGBNAN (which is either INT_MAX or INT_MIN)
// ilogbf: returns INT_MAX (7fffffff)
// logbf: returns QNAN (quieted SNAN)
// 0 returns FP_ILOGB0 (which is either INT_MIN or -INT_MAX)
// ilogbf: returns INT_MIN (80000000)
// logbf: returns -inf
// Registers used
//==============================================================
// general local registers:
// ar.pfs r32
// r33 -> r37
// r38 -> r41 used as parameters to error path
// predicate registers used:
// p6 - x nan, inf
// p7 - x 0
// p8 - x norm, unorm
// p9 - x unorm
// floating-point registers used:
// f8 - f10
#include "libm_support.h"
GR_SAVE_PFS = r32
GR_SAVE_B0 = r34
GR_SAVE_GP = r35
GR_Parameter_X = r38
GR_Parameter_Y = r39
GR_Parameter_RESULT = r40
GR_Parameter_TAG = r41
FR_X = f8
FR_Y = f0
FR_RESULT = f0
ilogbf:
// Form signexp of 2^64 in case need to scale denormal
{ .mmf
alloc r32=ar.pfs,1,5,4,0
(p0) mov r37 = 0x1003f
(p0) fnorm f9 = f8 ;;
}
// Form 2^64 in case need to scale denormal
{ .mfi
(p0) setf.exp f10 = r37
(p0) fclass.m.unc p7, p8 = f8, 0xe3
(p0) mov r34 = 0xffff ;;
}
// qnan snan inf norm unorm 0 -+
// 1 1 1 0 0 0 11
// e 3
// X ZERO, returns INT_MIN
// X INF or NAN, returns INT_MAX
{ .mfi
(p0) mov r35 = 0x1ffff
(p8) fclass.m.unc p6, p8 = f8, 0x07
nop.i 999 ;;
}
{ .mlx
nop.m 999
(p7) movl r8 = 0x000000007fffffff ;;
}
{ .mib
nop.m 999
nop.i 999
(p6) br.cond.spnt L(ILOGB_ZERO) ;;
}
// Test for denormal
{ .mfi
nop.m 999
(p8) fclass.m.unc p9, p0 = f9, 0x0b
nop.i 999 ;;
}
L(ILOGB_COMMON):
// X NORMAL returns true exponent
{ .mmi
nop.m 999
(p8) getf.exp r33 = f9
nop.i 999 ;;
}
// If denormal add 64 to exponent bias for scaling
{ .mfb
(p9) add r34 = 64, r34
nop.f 999
(p9) br.cond.spnt L(ILOGB_DENORM) ;;
}
{ .mmi
(p8) and r36 = r35, r33
nop.m 999
nop.i 999 ;;
}
{ .mib
(p8) sub r8 = r36, r34
nop.i 999
(p0) br.ret.sptk b0 ;;
}
L(ILOGB_DENORM):
// Here if x denormal
// Form x * 2^64 which is normal
// Return to common code
{ .mfb
cmp.eq p8,p9 = r0,r0
fmpy f9 = f9, f10
br.cond.sptk L(ILOGB_COMMON) ;;
}
// X ZERO
// return INT_MIN, call error support
L(ILOGB_ZERO):
{.mlx
mov GR_Parameter_TAG = 158
(p6) movl r33 = 0x0000000080000000 ;;
};;
.endp ilogbf
ASM_SIZE_DIRECTIVE(ilogbf)
.proc __libm_error_region
__libm_error_region:
.prologue
{ .mfi
add GR_Parameter_Y=-32,sp // Parameter 2 value
nop.f 0
.save ar.pfs,GR_SAVE_PFS
mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
}
{ .mfi
.fframe 64
add sp=-64,sp // Create new stack
nop.f 0
mov GR_SAVE_GP=gp // Save gp
};;
{ .mmi
stfs [GR_Parameter_Y] = FR_Y,16 // Save Parameter 2 on stack
add GR_Parameter_X = 16,sp // Parameter 1 address
.save b0, GR_SAVE_B0
mov GR_SAVE_B0=b0 // Save b0
};;
.body
{ .mib
stfs [GR_Parameter_X] = FR_X // Store Parameter 1 on stack
add GR_Parameter_RESULT = 0,GR_Parameter_Y
nop.b 0 // Parameter 3 address
}
{ .mib
stfs [GR_Parameter_Y] = FR_RESULT // Store Parameter 3 on stack
add GR_Parameter_Y = -16,GR_Parameter_Y
br.call.sptk b0=__libm_error_support# // Call error handling function
};;
{ .mmi
nop.m 0
nop.m 0
add GR_Parameter_RESULT = 48,sp
};;
{ .mmi
mov r8 = r33 // Store result
.restore sp
add sp = 64,sp // Restore stack pointer
mov b0 = GR_SAVE_B0 // Restore return address
};;
{ .mib
mov gp = GR_SAVE_GP // Restore gp
mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
br.ret.sptk b0 // Return
};;
.endp __libm_error_region
ASM_SIZE_DIRECTIVE(__libm_error_region)
.type __libm_error_support#,@function
.global __libm_error_support#

240
sysdeps/ia64/fpu/s_ilogbl.S Normal file
View File

@ -0,0 +1,240 @@
.file "ilogbl.s"
// Copyright (c) 2000, 2001, Intel Corporation
// All rights reserved.
//
// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
//
// WARRANTY DISCLAIMER
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
// http://developer.intel.com/opensource.
//
// History
//==============================================================
// 2/03/00 Initial version
// 5/26/00 Fix bug when x a double-extended denormal;
// if x=0 call error routine, per C9X
// 8/15/00 Bundle added after call to __libm_error_support to properly
// set [the previously overwritten] GR_Parameter_RESULT.
// 1/20/01 Fixed result for x=0
.align 32
.global ilogbl#
.section .text
.proc ilogbl#
.align 32
// API
//==============================================================
// int = ilogbl(double_extended)
// Overview of operation
//==============================================================
// ilogbl computes log2(x) as an int
// and returns it in r8
// ilogbl is similar to logbl but differs in the following ways:
// +-inf
// ilogbl: returns INT_MAX
// logbl: returns +inf
// Nan returns FP_ILOGBNAN (which is either INT_MAX or INT_MIN)
// ilogbl: returns INT_MAX (7fffffff)
// logbl: returns QNAN (quieted SNAN)
// 0 returns FP_ILOGB0 (which is either INT_MIN or -INT_MAX)
// ilogbl: returns INT_MIN (80000000)
// logbl: returns -inf
// Registers used
//==============================================================
// general local registers:
// ar.pfs r32
// r33 -> r37
// r38 -> r41 used as parameters to error path
// predicate registers used:
// p6 - x nan, inf
// p7 - x 0
// p8 - x norm, unorm
// p9 - x unorm
// floating-point registers used:
// f8 - f10
#include "libm_support.h"
GR_SAVE_PFS = r32
GR_SAVE_B0 = r34
GR_SAVE_GP = r35
GR_Parameter_X = r38
GR_Parameter_Y = r39
GR_Parameter_RESULT = r40
GR_Parameter_TAG = r41
FR_X = f8
FR_Y = f0
FR_RESULT = f0
ilogbl:
// Form signexp of 2^64 in case need to scale denormal
{ .mmf
alloc r32=ar.pfs,1,5,4,0
(p0) mov r37 = 0x1003f
(p0) fnorm f9 = f8 ;;
}
// Form 2^64 in case need to scale denormal
{ .mfi
(p0) setf.exp f10 = r37
(p0) fclass.m.unc p7, p8 = f8, 0xe3
(p0) mov r34 = 0xffff ;;
}
// qnan snan inf norm unorm 0 -+
// 1 1 1 0 0 0 11
// e 3
// X ZERO, returns INT_MIN
// X INF or NAN, returns INT_MAX
{ .mfi
(p0) mov r35 = 0x1ffff
(p8) fclass.m.unc p6, p8 = f8, 0x07
nop.i 999 ;;
}
{ .mlx
nop.m 999
(p7) movl r8 = 0x000000007fffffff ;;
}
{ .mib
nop.m 999
nop.i 999
(p6) br.cond.spnt L(ILOGB_ZERO) ;;
}
// Test for denormal
{ .mfi
nop.m 999
(p8) fclass.m.unc p9, p0 = f9, 0x0b
nop.i 999 ;;
}
L(ILOGB_COMMON):
// X NORMAL returns true exponent
{ .mmi
nop.m 999
(p8) getf.exp r33 = f9
nop.i 999 ;;
}
// If denormal add 64 to exponent bias for scaling
{ .mfb
(p9) add r34 = 64, r34
nop.f 999
(p9) br.cond.spnt L(ILOGB_DENORM) ;;
}
{ .mmi
(p8) and r36 = r35, r33
nop.m 999
nop.i 999 ;;
}
{ .mib
(p8) sub r8 = r36, r34
nop.i 999
(p0) br.ret.sptk b0 ;;
}
L(ILOGB_DENORM):
// Here if x denormal
// Form x * 2^64 which is normal
// Return to common code
{ .mfb
cmp.eq p8,p9 = r0,r0
fmpy f9 = f9, f10
br.cond.sptk L(ILOGB_COMMON) ;;
}
// X ZERO
// return INT_MIN, call error support
L(ILOGB_ZERO):
{.mlx
mov GR_Parameter_TAG = 156
(p6) movl r33 = 0x0000000080000000 ;;
};;
.endp ilogbl
ASM_SIZE_DIRECTIVE(ilogbl)
.proc __libm_error_region
__libm_error_region:
.prologue
{ .mfi
add GR_Parameter_Y=-32,sp // Parameter 2 value
nop.f 0
.save ar.pfs,GR_SAVE_PFS
mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
}
{ .mfi
.fframe 64
add sp=-64,sp // Create new stack
nop.f 0
mov GR_SAVE_GP=gp // Save gp
};;
{ .mmi
stfe [GR_Parameter_Y] = FR_Y,16 // Save Parameter 2 on stack
add GR_Parameter_X = 16,sp // Parameter 1 address
.save b0, GR_SAVE_B0
mov GR_SAVE_B0=b0 // Save b0
};;
.body
{ .mib
stfe [GR_Parameter_X] = FR_X // Store Parameter 1 on stack
add GR_Parameter_RESULT = 0,GR_Parameter_Y
nop.b 0 // Parameter 3 address
}
{ .mib
stfe [GR_Parameter_Y] = FR_RESULT // Store Parameter 3 on stack
add GR_Parameter_Y = -16,GR_Parameter_Y
br.call.sptk b0=__libm_error_support# // Call error handling function
};;
{ .mmi
nop.m 0
nop.m 0
add GR_Parameter_RESULT = 48,sp
};;
{ .mmi
mov r8 = r33 // Store result
.restore sp
add sp = 64,sp // Restore stack pointer
mov b0 = GR_SAVE_B0 // Restore return address
};;
{ .mib
mov gp = GR_SAVE_GP // Restore gp
mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
br.ret.sptk b0 // Return
};;
.endp __libm_error_region
ASM_SIZE_DIRECTIVE(__libm_error_region)
.type __libm_error_support#,@function
.global __libm_error_support#

367
sysdeps/ia64/fpu/s_ldexp.S Normal file
View File

@ -0,0 +1,367 @@
.file "ldexp.s"
// Copyright (c) 2000, 2001, Intel Corporation
// All rights reserved.
//
// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
//
// WARRANTY DISCLAIMER
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
// http://developer.intel.com/opensource.
//
// History
//==============================================================
// 2/02/00 Initial version
// 1/26/01 ldex pcompletely reworked and now standalone version
//
// API
//==============================================================
// double = ldexp (double x, int n)
// input floating point f8 and int n (r33)
// output floating point f8
//
// Returns x* 2**n using an fma and detects overflow
// and underflow.
//
//
#include "libm_support.h"
FR_Big = f6
FR_NBig = f7
FR_Floating_X = f8
FR_Result = f8
FR_Result2 = f9
FR_Result3 = f11
FR_Norm_X = f12
FR_Two_N = f14
FR_Two_to_Big = f15
GR_N_Biased = r15
GR_Big = r16
GR_NBig = r17
GR_Scratch = r18
GR_Scratch1 = r19
GR_Bias = r20
GR_N_as_int = r21
GR_SAVE_B0 = r32
GR_SAVE_GP = r33
GR_SAVE_PFS = r34
GR_Parameter_X = r35
GR_Parameter_Y = r36
GR_Parameter_RESULT = r37
GR_Tag = r38
.align 32
.global ldexp
.section .text
.proc ldexp
.align 32
ldexp:
//
// Is x NAN, INF, ZERO, +-?
// Build the exponent Bias
//
{ .mfi
alloc r32=ar.pfs,1,2,4,0
fclass.m.unc p7,p0 = FR_Floating_X, 0xe7 //@snan | @qnan | @inf | @zero
addl GR_Bias = 0x0FFFF,r0
}
//
// Sign extend input
// Is N zero?
// Normalize x
//
{ .mfi
cmp.eq.unc p6,p0 = r33,r0
fnorm.s1 FR_Norm_X = FR_Floating_X
sxt4 GR_N_as_int = r33
}
;;
//
// Normalize x
// Branch and return special values.
// Create -35000
// Create 35000
//
{ .mfi
addl GR_Big = 35000,r0
nop.f 0
add GR_N_Biased = GR_Bias,GR_N_as_int
}
{ .mfb
addl GR_NBig = -35000,r0
(p7) fma.d.s0 FR_Result = FR_Floating_X,f1, f0
(p7) br.ret.spnt b0
};;
//
// Build the exponent Bias
// Return x when N = 0
//
{ .mfi
setf.exp FR_Two_N = GR_N_Biased
nop.f 0
addl GR_Scratch1 = 0x063BF,r0
}
{ .mfb
addl GR_Scratch = 0x019C3F,r0
(p6) fma.d.s0 FR_Result = FR_Floating_X,f1, f0
(p6) br.ret.spnt b0
};;
//
// Create 2*big
// Create 2**-big
// Is N > 35000
// Is N < -35000
// Raise Denormal operand flag with compare
// Main path, create 2**N
//
{ .mfi
setf.exp FR_NBig = GR_Scratch1
nop.f 0
cmp.ge.unc p6, p0 = GR_N_as_int, GR_Big
}
{ .mfi
setf.exp FR_Big = GR_Scratch
fcmp.ge.s0 p0,p11 = FR_Floating_X,f0
cmp.le.unc p8, p0 = GR_N_as_int, GR_NBig
};;
//
// Adjust 2**N if N was very small or very large
//
{ .mfi
nop.m 0
(p6) fma.s1 FR_Two_N = FR_Big,f1,f0
nop.i 0
}
{ .mlx
nop.m 999
(p0) movl GR_Scratch = 0x00000000000303FF
};;
{ .mfi
nop.m 0
(p8) fma.s1 FR_Two_N = FR_NBig,f1,f0
nop.i 0
}
{ .mlx
nop.m 999
(p0) movl GR_Scratch1= 0x00000000000103FF
};;
// Set up necessary status fields
//
// S0 user supplied status
// S2 user supplied status + WRE + TD (Overflows)
// S3 user supplied status + FZ + TD (Underflows)
//
{ .mfi
nop.m 999
(p0) fsetc.s3 0x7F,0x41
nop.i 999
}
{ .mfi
nop.m 999
(p0) fsetc.s2 0x7F,0x42
nop.i 999
};;
//
// Do final operation
//
{ .mfi
setf.exp FR_NBig = GR_Scratch
fma.d.s0 FR_Result = FR_Two_N,FR_Norm_X,f0
nop.i 999
}
{ .mfi
nop.m 999
fma.d.s3 FR_Result3 = FR_Two_N,FR_Norm_X,f0
nop.i 999
};;
{ .mfi
setf.exp FR_Big = GR_Scratch1
fma.d.s2 FR_Result2 = FR_Two_N,FR_Norm_X,f0
nop.i 999
};;
//
// Check for overflow or underflow.
// Restore s3
// Restore s2
//
{ .mfi
nop.m 0
fsetc.s3 0x7F,0x40
nop.i 999
}
{ .mfi
nop.m 0
fsetc.s2 0x7F,0x40
nop.i 999
};;
//
// Is the result zero?
//
{ .mfi
nop.m 999
fclass.m.unc p6, p0 = FR_Result3, 0x007
nop.i 999
}
{ .mfi
addl GR_Tag = 146, r0
fcmp.ge.unc.s1 p7, p8 = FR_Result2 , FR_Big
nop.i 0
};;
//
// Detect masked underflow - Tiny + Inexact Only
//
{ .mfi
nop.m 999
(p6) fcmp.neq.unc.s1 p6, p0 = FR_Result , FR_Result2
nop.i 999
};;
//
// Is result bigger the allowed range?
// Branch out for underflow
//
{ .mfb
(p6) addl GR_Tag = 147, r0
(p8) fcmp.le.unc.s1 p9, p10 = FR_Result2 , FR_NBig
(p6) br.cond.spnt L(LDEXP_UNDERFLOW)
};;
//
// Branch out for overflow
//
{ .mbb
nop.m 0
(p7) br.cond.spnt L(LDEXP_OVERFLOW)
(p9) br.cond.spnt L(LDEXP_OVERFLOW)
};;
//
// Return from main path.
//
{ .mfb
nop.m 999
nop.f 0
br.ret.sptk b0;;
}
.endp ldexp
ASM_SIZE_DIRECTIVE(ldexp)
.proc __libm_error_region
__libm_error_region:
L(LDEXP_OVERFLOW):
L(LDEXP_UNDERFLOW):
//
// Get stack address of N
//
.prologue
{ .mfi
add GR_Parameter_Y=-32,sp
nop.f 0
.save ar.pfs,GR_SAVE_PFS
mov GR_SAVE_PFS=ar.pfs
}
//
// Adjust sp
//
{ .mfi
.fframe 64
add sp=-64,sp
nop.f 0
mov GR_SAVE_GP=gp
};;
//
// Store N on stack in correct position
// Locate the address of x on stack
//
{ .mmi
st8 [GR_Parameter_Y] = GR_N_as_int,16
add GR_Parameter_X = 16,sp
.save b0, GR_SAVE_B0
mov GR_SAVE_B0=b0
};;
//
// Store x on the stack.
// Get address for result on stack.
//
.body
{ .mib
stfd [GR_Parameter_X] = FR_Norm_X
add GR_Parameter_RESULT = 0,GR_Parameter_Y
nop.b 0
}
{ .mib
stfd [GR_Parameter_Y] = FR_Result
add GR_Parameter_Y = -16,GR_Parameter_Y
br.call.sptk b0=__libm_error_support#
};;
//
// Get location of result on stack
//
{ .mmi
nop.m 0
nop.m 0
add GR_Parameter_RESULT = 48,sp
};;
//
// Get the new result
//
{ .mmi
ldfd FR_Result = [GR_Parameter_RESULT]
.restore sp
add sp = 64,sp
mov b0 = GR_SAVE_B0
};;
//
// Restore gp, ar.pfs and return
//
{ .mib
mov gp = GR_SAVE_GP
mov ar.pfs = GR_SAVE_PFS
br.ret.sptk b0
};;
.endp __libm_error_region
ASM_SIZE_DIRECTIVE(__libm_error_region)
.type __libm_error_support#,@function
.global __libm_error_support#

366
sysdeps/ia64/fpu/s_ldexpf.S Normal file
View File

@ -0,0 +1,366 @@
//.file "ldexpf.s"
// Copyright (c) 2000, 2001, Intel Corporation
// All rights reserved.
//
// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
//
// WARRANTY DISCLAIMER
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
// http://developer.intel.com/opensource.
//
// History
//==============================================================
// 2/02/00 Initial version
// 1/26/01 ldexpf completely reworked and now standalone version
//
// API
//==============================================================
// float = ldexpf (float x, int n)
// input floating point f8 and int n (r33)
// output floating point f8
//
// Returns x* 2**n using an fma and detects overflow
// and underflow.
//
//
#include "libm_support.h"
FR_Big = f6
FR_NBig = f7
FR_Floating_X = f8
FR_Result = f8
FR_Result2 = f9
FR_Result3 = f11
FR_Norm_X = f12
FR_Two_N = f14
FR_Two_to_Big = f15
GR_N_Biased = r15
GR_Big = r16
GR_NBig = r17
GR_Scratch = r18
GR_Scratch1 = r19
GR_Bias = r20
GR_N_as_int = r21
GR_SAVE_B0 = r32
GR_SAVE_GP = r33
GR_SAVE_PFS = r34
GR_Parameter_X = r35
GR_Parameter_Y = r36
GR_Parameter_RESULT = r37
GR_Tag = r38
.align 32
.global ldexpf
.section .text
.proc ldexpf
.align 32
ldexpf:
//
// Is x NAN, INF, ZERO, +-?
// Build the exponent Bias
//
{ .mfi
alloc r32=ar.pfs,1,2,4,0
fclass.m.unc p7,p0 = FR_Floating_X, 0xe7 //@snan | @qnan | @inf | @zero
addl GR_Bias = 0x0FFFF,r0
}
//
// Sign extend input
// Is N zero?
// Normalize x
//
{ .mfi
cmp.eq.unc p6,p0 = r33,r0
fnorm.s1 FR_Norm_X = FR_Floating_X
sxt4 GR_N_as_int = r33
}
;;
//
// Normalize x
// Branch and return special values.
// Create -35000
// Create 35000
//
{ .mfi
addl GR_Big = 35000,r0
nop.f 0
add GR_N_Biased = GR_Bias,GR_N_as_int
}
{ .mfb
addl GR_NBig = -35000,r0
(p7) fma.s.s0 FR_Result = FR_Floating_X,f1, f0
(p7) br.ret.spnt b0
};;
//
// Build the exponent Bias
// Return x when N = 0
//
{ .mfi
setf.exp FR_Two_N = GR_N_Biased
nop.f 0
addl GR_Scratch1 = 0x063BF,r0
}
{ .mfb
addl GR_Scratch = 0x019C3F,r0
(p6) fma.s.s0 FR_Result = FR_Floating_X,f1, f0
(p6) br.ret.spnt b0
};;
//
// Create 2*big
// Create 2**-big
// Is N > 35000
// Is N < -35000
// Raise Denormal operand flag with compare
// Main path, create 2**N
//
{ .mfi
setf.exp FR_NBig = GR_Scratch1
nop.f 0
cmp.ge.unc p6, p0 = GR_N_as_int, GR_Big
}
{ .mfi
setf.exp FR_Big = GR_Scratch
fcmp.ge.s0 p0,p11 = FR_Floating_X,f0
cmp.le.unc p8, p0 = GR_N_as_int, GR_NBig
};;
//
// Adjust 2**N if N was very small or very large
//
{ .mfi
nop.m 0
(p6) fma.s1 FR_Two_N = FR_Big,f1,f0
nop.i 0
}
{ .mlx
nop.m 999
(p0) movl GR_Scratch = 0x000000000003007F
};;
{ .mfi
nop.m 0
(p8) fma.s1 FR_Two_N = FR_NBig,f1,f0
nop.i 0
}
{ .mlx
nop.m 999
(p0) movl GR_Scratch1= 0x000000000001007F
};;
// Set up necessary status fields
//
// S0 user supplied status
// S2 user supplied status + WRE + TD (Overflows)
// S3 user supplied status + FZ + TD (Underflows)
//
{ .mfi
nop.m 999
(p0) fsetc.s3 0x7F,0x41
nop.i 999
}
{ .mfi
nop.m 999
(p0) fsetc.s2 0x7F,0x42
nop.i 999
};;
//
// Do final operation
//
{ .mfi
setf.exp FR_NBig = GR_Scratch
fma.s.s0 FR_Result = FR_Two_N,FR_Norm_X,f0
nop.i 999
}
{ .mfi
nop.m 999
fma.s.s3 FR_Result3 = FR_Two_N,FR_Norm_X,f0
nop.i 999
};;
{ .mfi
setf.exp FR_Big = GR_Scratch1
fma.s.s2 FR_Result2 = FR_Two_N,FR_Norm_X,f0
nop.i 999
};;
// Check for overflow or underflow.
// Restore s3
// Restore s2
//
{ .mfi
nop.m 0
fsetc.s3 0x7F,0x40
nop.i 999
}
{ .mfi
nop.m 0
fsetc.s2 0x7F,0x40
nop.i 999
};;
//
// Is the result zero?
//
{ .mfi
nop.m 999
fclass.m.unc p6, p0 = FR_Result3, 0x007
nop.i 999
}
{ .mfi
addl GR_Tag = 148, r0
fcmp.ge.unc.s1 p7, p8 = FR_Result2 , FR_Big
nop.i 0
};;
//
// Detect masked underflow - Tiny + Inexact Only
//
{ .mfi
nop.m 999
(p6) fcmp.neq.unc.s1 p6, p0 = FR_Result , FR_Result2
nop.i 999
};;
//
// Is result bigger the allowed range?
// Branch out for underflow
//
{ .mfb
(p6) addl GR_Tag = 149, r0
(p8) fcmp.le.unc.s1 p9, p10 = FR_Result2 , FR_NBig
(p6) br.cond.spnt L(ldexpf_UNDERFLOW)
};;
//
// Branch out for overflow
//
{ .mbb
nop.m 0
(p7) br.cond.spnt L(ldexpf_OVERFLOW)
(p9) br.cond.spnt L(ldexpf_OVERFLOW)
};;
//
// Return from main path.
//
{ .mfb
nop.m 999
nop.f 0
br.ret.sptk b0;;
}
.endp ldexpf
ASM_SIZE_DIRECTIVE(ldexpf)
.proc __libm_error_region
__libm_error_region:
L(ldexpf_OVERFLOW):
L(ldexpf_UNDERFLOW):
//
// Get stack address of N
//
.prologue
{ .mfi
add GR_Parameter_Y=-32,sp
nop.f 0
.save ar.pfs,GR_SAVE_PFS
mov GR_SAVE_PFS=ar.pfs
}
//
// Adjust sp
//
{ .mfi
.fframe 64
add sp=-64,sp
nop.f 0
mov GR_SAVE_GP=gp
};;
//
// Store N on stack in correct position
// Locate the address of x on stack
//
{ .mmi
st8 [GR_Parameter_Y] = GR_N_as_int,16
add GR_Parameter_X = 16,sp
.save b0, GR_SAVE_B0
mov GR_SAVE_B0=b0
};;
//
// Store x on the stack.
// Get address for result on stack.
//
.body
{ .mib
stfs [GR_Parameter_X] = FR_Norm_X
add GR_Parameter_RESULT = 0,GR_Parameter_Y
nop.b 0
}
{ .mib
stfs [GR_Parameter_Y] = FR_Result
add GR_Parameter_Y = -16,GR_Parameter_Y
br.call.sptk b0=__libm_error_support#
};;
//
// Get location of result on stack
//
{ .mmi
nop.m 0
nop.m 0
add GR_Parameter_RESULT = 48,sp
};;
//
// Get the new result
//
{ .mmi
ldfs FR_Result = [GR_Parameter_RESULT]
.restore sp
add sp = 64,sp
mov b0 = GR_SAVE_B0
};;
//
// Restore gp, ar.pfs and return
//
{ .mib
mov gp = GR_SAVE_GP
mov ar.pfs = GR_SAVE_PFS
br.ret.sptk b0
};;
.endp __libm_error_region
ASM_SIZE_DIRECTIVE(__libm_error_region)
.type __libm_error_support#,@function
.global __libm_error_support#

366
sysdeps/ia64/fpu/s_ldexpl.S Normal file
View File

@ -0,0 +1,366 @@
//.file "ldexpl.s"
// Copyright (c) 2000, 2001, Intel Corporation
// All rights reserved.
//
// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
//
// WARRANTY DISCLAIMER
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
// http://developer.intel.com/opensource.
//
// History
//==============================================================
// 2/02/00 Initial version
// 1/26/01 ldexpl completely reworked and now standalone version
//
// API
//==============================================================
// double-extended = ldexpl (double-extended x, int n)
// input floating point f8 and int n (r34)
// output floating point f8
//
// Returns x* 2**n using an fma and detects overflow
// and underflow.
//
//
#include "libm_support.h"
FR_Big = f6
FR_NBig = f7
FR_Floating_X = f8
FR_Result = f8
FR_Result2 = f9
FR_Result3 = f11
FR_Norm_X = f12
FR_Two_N = f14
FR_Two_to_Big = f15
GR_N_Biased = r15
GR_Big = r16
GR_NBig = r17
GR_Scratch = r18
GR_Scratch1 = r19
GR_Bias = r20
GR_N_as_int = r21
GR_SAVE_B0 = r32
GR_SAVE_GP = r33
GR_SAVE_PFS = r34
GR_Parameter_X = r35
GR_Parameter_Y = r36
GR_Parameter_RESULT = r37
GR_Tag = r38
.align 32
.global ldexpl
.section .text
.proc ldexpl
.align 32
ldexpl:
//
// Is x NAN, INF, ZERO, +-?
// Build the exponent Bias
//
{ .mfi
alloc r32=ar.pfs,2,1,4,0
fclass.m.unc p7,p0 = FR_Floating_X, 0xe7 //@snan | @qnan | @inf | @zero
addl GR_Bias = 0x0FFFF,r0
}
//
// Sign extend input
// Is N zero?
// Normalize x
//
{ .mfi
cmp.eq.unc p6,p0 = r34,r0
fnorm.s1 FR_Norm_X = FR_Floating_X
sxt4 GR_N_as_int = r34
}
;;
//
// Normalize x
// Branch and return special values.
// Create -35000
// Create 35000
//
{ .mfi
addl GR_Big = 35000,r0
nop.f 0
add GR_N_Biased = GR_Bias,GR_N_as_int
}
{ .mfb
addl GR_NBig = -35000,r0
(p7) fma.s0 FR_Result = FR_Floating_X,f1, f0
(p7) br.ret.spnt b0
};;
//
// Build the exponent Bias
// Return x when N = 0
//
{ .mfi
setf.exp FR_Two_N = GR_N_Biased
nop.f 0
addl GR_Scratch1 = 0x063BF,r0
}
{ .mfb
addl GR_Scratch = 0x019C3F,r0
(p6) fma.s0 FR_Result = FR_Floating_X,f1, f0
(p6) br.ret.spnt b0
};;
//
// Create 2*big
// Create 2**-big
// Is N > 35000
// Is N < -35000
// Raise Denormal operand flag with compare
// Main path, create 2**N
//
{ .mfi
setf.exp FR_NBig = GR_Scratch1
nop.f 0
cmp.ge.unc p6, p0 = GR_N_as_int, GR_Big
}
{ .mfi
setf.exp FR_Big = GR_Scratch
fcmp.ge.s0 p0,p11 = FR_Floating_X,f0
cmp.le.unc p8, p0 = GR_N_as_int, GR_NBig
};;
//
// Adjust 2**N if N was very small or very large
//
{ .mfi
nop.m 0
(p6) fma.s1 FR_Two_N = FR_Big,f1,f0
nop.i 0
}
{ .mlx
nop.m 999
(p0) movl GR_Scratch = 0x0000000000033FFF
};;
{ .mfi
nop.m 0
(p8) fma.s1 FR_Two_N = FR_NBig,f1,f0
nop.i 0
}
{ .mlx
nop.m 999
(p0) movl GR_Scratch1= 0x0000000000013FFF
};;
// Set up necessary status fields
//
// S0 user supplied status
// S2 user supplied status + WRE + TD (Overflows)
// S3 user supplied status + FZ + TD (Underflows)
//
{ .mfi
nop.m 999
(p0) fsetc.s3 0x7F,0x41
nop.i 999
}
{ .mfi
nop.m 999
(p0) fsetc.s2 0x7F,0x42
nop.i 999
};;
//
// Do final operation
//
{ .mfi
setf.exp FR_NBig = GR_Scratch
fma.s0 FR_Result = FR_Two_N,FR_Norm_X,f0
nop.i 999
}
{ .mfi
nop.m 999
fma.s3 FR_Result3 = FR_Two_N,FR_Norm_X,f0
nop.i 999
};;
{ .mfi
setf.exp FR_Big = GR_Scratch1
fma.s2 FR_Result2 = FR_Two_N,FR_Norm_X,f0
nop.i 999
};;
// Check for overflow or underflow.
// Restore s3
// Restore s2
//
{ .mfi
nop.m 0
fsetc.s3 0x7F,0x40
nop.i 999
}
{ .mfi
nop.m 0
fsetc.s2 0x7F,0x40
nop.i 999
};;
//
// Is the result zero?
//
{ .mfi
nop.m 999
fclass.m.unc p6, p0 = FR_Result3, 0x007
nop.i 999
}
{ .mfi
addl GR_Tag = 144, r0
fcmp.ge.unc.s1 p7, p8 = FR_Result2 , FR_Big
nop.i 0
};;
//
// Detect masked underflow - Tiny + Inexact Only
//
{ .mfi
nop.m 999
(p6) fcmp.neq.unc.s1 p6, p0 = FR_Result , FR_Result2
nop.i 999
};;
//
// Is result bigger the allowed range?
// Branch out for underflow
//
{ .mfb
(p6) addl GR_Tag = 145, r0
(p8) fcmp.le.unc.s1 p9, p10 = FR_Result2 , FR_NBig
(p6) br.cond.spnt L(ldexpl_UNDERFLOW)
};;
//
// Branch out for overflow
//
{ .mbb
nop.m 0
(p7) br.cond.spnt L(ldexpl_OVERFLOW)
(p9) br.cond.spnt L(ldexpl_OVERFLOW)
};;
//
// Return from main path.
//
{ .mfb
nop.m 999
nop.f 0
br.ret.sptk b0;;
}
.endp ldexpl
ASM_SIZE_DIRECTIVE(ldexpl)
.proc __libm_error_region
__libm_error_region:
L(ldexpl_OVERFLOW):
L(ldexpl_UNDERFLOW):
//
// Get stack address of N
//
.prologue
{ .mfi
add GR_Parameter_Y=-32,sp
nop.f 0
.save ar.pfs,GR_SAVE_PFS
mov GR_SAVE_PFS=ar.pfs
}
//
// Adjust sp
//
{ .mfi
.fframe 64
add sp=-64,sp
nop.f 0
mov GR_SAVE_GP=gp
};;
//
// Store N on stack in correct position
// Locate the address of x on stack
//
{ .mmi
st8 [GR_Parameter_Y] = GR_N_as_int,16
add GR_Parameter_X = 16,sp
.save b0, GR_SAVE_B0
mov GR_SAVE_B0=b0
};;
//
// Store x on the stack.
// Get address for result on stack.
//
.body
{ .mib
stfe [GR_Parameter_X] = FR_Norm_X
add GR_Parameter_RESULT = 0,GR_Parameter_Y
nop.b 0
}
{ .mib
stfe [GR_Parameter_Y] = FR_Result
add GR_Parameter_Y = -16,GR_Parameter_Y
br.call.sptk b0=__libm_error_support#
};;
//
// Get location of result on stack
//
{ .mmi
nop.m 0
nop.m 0
add GR_Parameter_RESULT = 48,sp
};;
//
// Get the new result
//
{ .mmi
ldfe FR_Result = [GR_Parameter_RESULT]
.restore sp
add sp = 64,sp
mov b0 = GR_SAVE_B0
};;
//
// Restore gp, ar.pfs and return
//
{ .mib
mov gp = GR_SAVE_GP
mov ar.pfs = GR_SAVE_PFS
br.ret.sptk b0
};;
.endp __libm_error_region
ASM_SIZE_DIRECTIVE(__libm_error_region)
.type __libm_error_support#,@function
.global __libm_error_support#

1614
sysdeps/ia64/fpu/s_log1p.S Normal file

File diff suppressed because it is too large Load Diff

1616
sysdeps/ia64/fpu/s_log1pf.S Normal file

File diff suppressed because it is too large Load Diff

1663
sysdeps/ia64/fpu/s_log1pl.S Normal file

File diff suppressed because it is too large Load Diff

314
sysdeps/ia64/fpu/s_logb.S Normal file
View File

@ -0,0 +1,314 @@
.file "logb.s"
// Copyright (c) 2000, 2001, Intel Corporation
// All rights reserved.
//
// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
//
// WARRANTY DISCLAIMER
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
// http://developer.intel.com/opensource.
//
// History
//==============================================================
// 2/02/00 Initial version
// 2/16/00 Modified to conform to C9X
// 3/16/00 Improved speed
// 4/04/00 Unwind support added
// 5/30/00 Fixed bug when x double-extended denormal
// 8/15/00 Bundle added after call to __libm_error_support to properly
// set [the previously overwritten] GR_Parameter_RESULT.
//
// API
//==============================================================
// double logb( double x);
//
// Overview of operation
//==============================================================
// The logb function extracts the exponent of x as an integer in
// floating-point format.
// logb computes log2 of x as a double
//
// logb is similar to ilogb but differs in the following ways:
// +-inf
// ilogb: returns INT_MAX
// logb: returns +inf
// Nan returns FP_LOGBNAN (which is either INT_MAX or INT_MIN)
// ilogb: returns INT_MAX (7fffffff)
// logb: returns QNAN (quietized SNAN)
// 0 returns FP_ILOGB0 (which is either INT_MIN or -INT_MAX)
// ilogb: returns -INT_MAX (80000001)
// logb: returns -inf, raises the divide-by-zero exception,
// and calls libm_error_support to set domain error
//
// Registers used
//==============================================================
// general registers used:
// ar.pfs r32
// r33 -> r37
// r38 -> r41 used as parameters to error path
//
// predicate registers used:
// p6, p7, p8
// floating-point registers used:
// f9, f10, f11
// f8, input
#include "libm_support.h"
GR_SAVE_B0 = r34
GR_SAVE_GP = r35
GR_SAVE_PFS = r32
GR_Parameter_X = r38
GR_Parameter_Y = r39
GR_Parameter_RESULT = r40
.align 32
.global logb#
.section .text
.proc logb#
.align 32
logb:
// qnan snan inf norm unorm 0 -+
// 0 0 0 0 1 0 11
// 0 b
{ .mfi
alloc r32=ar.pfs,1,5,4,0
(p0) fclass.m.unc p8,p0 = f8, 0x0b
nop.i 999
}
// X NORMAL
// r37 = exp(f8) - - 0xffff
// sig(f8) = r37
// f8 = convert_to_fp (sig))
{ .mfi
(p0) getf.exp r35 = f8
(p0) fnorm f10=f8
nop.i 999 ;;
}
// qnan snan inf norm unorm 0 -+
// 1 1 1 0 0 0 11
// e 3
{ .mmf
(p0) mov r33 = 0xffff
(p0) mov r34 = 0x1ffff
(p0) fclass.m.unc p6,p0 = f8, 0xe3 ;;
}
{ .mfb
(p0) and r36 = r35, r34
(p0) fclass.m.unc p7,p0 = f8, 0x07
(p8) br.cond.spnt L(LOGB_DENORM) ;;
}
{ .mib
(p0) sub r37 = r36, r33
nop.i 999
(p6) br.cond.spnt L(LOGB_NAN_INF) ;;
}
{ .mib
(p0) setf.sig f9 = r37
nop.i 999
(p7) br.cond.spnt L(LOGB_ZERO) ;;
}
{ .mfi
nop.m 999
(p0) fcvt.xf f10 = f9
nop.i 999 ;;
}
{ .mfb
nop.m 999
(p0) fnorm.d f8 = f10
(p0) br.ret.sptk b0 ;;
}
L(LOGB_DENORM):
// Form signexp of 2^64 in case need to scale denormal
// Check to see if double-extended denormal
{ .mfi
(p0) mov r38 = 0x1003f
(p0) fclass.m.unc p8,p0 = f10, 0x0b
nop.i 999 ;;
}
// Form 2^64 in case need to scale denormal
{ .mfi
(p0) setf.exp f11 = r38
nop.f 999
nop.i 999 ;;
}
// If double-extended denormal add 64 to exponent bias for scaling
// If double-extended denormal form x * 2^64 which is normal
{ .mfi
(p8) add r33 = 64, r33
(p8) fmpy f10 = f10, f11
nop.i 999 ;;
}
// Logic is the same as normal path but use normalized input
{ .mmi
(p0) getf.exp r35 = f10 ;;
nop.m 999
nop.i 999 ;;
}
{ .mmi
(p0) and r36 = r35, r34 ;;
(p0) sub r37 = r36, r33
nop.i 999 ;;
}
{ .mmi
(p0) setf.sig f9 = r37
nop.m 999
nop.i 999 ;;
}
{ .mfi
nop.m 999
(p0) fcvt.xf f10 = f9
nop.i 999 ;;
}
{ .mfb
nop.m 999
(p0) fnorm.d f8 = f10
(p0) br.ret.sptk b0 ;;
}
L(LOGB_NAN_INF):
// X NAN or INFINITY, return f8 * f8
{ .mfb
nop.m 999
(p0) fma.d f8= f8,f8,f0
(p0) br.ret.sptk b0 ;;
}
.endp logb#
ASM_SIZE_DIRECTIVE(logb)
// Stack operations when calling error support.
// (1) (2) (3) (call) (4)
// sp -> + psp -> + psp -> + sp -> +
// | | | |
// | | <- GR_Y R3 ->| <- GR_RESULT | -> f8
// | | | |
// | <-GR_Y Y2->| Y2 ->| <- GR_Y |
// | | | |
// | | <- GR_X X1 ->| |
// | | | |
// sp-64 -> + sp -> + sp -> + +
// save ar.pfs save b0 restore gp
// save gp restore ar.pfs
.proc __libm_error_region
__libm_error_region:
L(LOGB_ZERO):
.prologue
// f9 = |f8|
// f10 = -f9 = -|f8|
// f9 = 1.0/f10 = -1.0/-|f8|
{ .mfi
mov r41 = 151 // Error code
(p0) fmerge.s f9 = f0,f8
nop.i 999
}
;;
{ .mfi
nop.m 999
fmerge.ns f10 = f0,f9
nop.i 999
}
;;
// (1)
{ .mfi
add GR_Parameter_Y=-32,sp // Parameter 2 value
frcpa f9,p6 = f1,f10
.save ar.pfs,GR_SAVE_PFS
mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
}
{ .mfi
.fframe 64
add sp=-64,sp // Create new stack
nop.f 0
mov GR_SAVE_GP=gp // Save gp
};;
// (2)
{ .mmi
stfd [GR_Parameter_Y] = f0,16 // STORE Parameter 2 on stack
add GR_Parameter_X = 16,sp // Parameter 1 address
.save b0, GR_SAVE_B0
mov GR_SAVE_B0=b0 // Save b0
};;
.body
// (3)
{ .mib
stfd [GR_Parameter_X] = f8 // STORE Parameter 1 on stack
add GR_Parameter_RESULT = 0,GR_Parameter_Y // Parameter 3 address
nop.b 0
}
{ .mib
stfd [GR_Parameter_Y] = f9 // Store Parameter 3 on stack
add GR_Parameter_Y = -16,GR_Parameter_Y
br.call.sptk b0=__libm_error_support# // Call error handling function
};;
{ .mmi
nop.m 0
nop.m 0
add GR_Parameter_RESULT = 48,sp
};;
// (4)
{ .mmi
ldfd f8 = [GR_Parameter_RESULT] // Get return result off stack
.restore sp
add sp = 64,sp // Restore stack pointer
mov b0 = GR_SAVE_B0 // Restore return address
};;
{ .mib
mov gp = GR_SAVE_GP // Restore gp
mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
br.ret.sptk b0
};;
.endp __libm_error_region
ASM_SIZE_DIRECTIVE(__libm_error_region)
.type __libm_error_support#,@function
.global __libm_error_support#

301
sysdeps/ia64/fpu/s_logbf.S Normal file
View File

@ -0,0 +1,301 @@
.file "logbf.s"
// Copyright (c) 2000, 2001, Intel Corporation
// All rights reserved.
//
// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
//
// WARRANTY DISCLAIMER
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
// http://developer.intel.com/opensource.
//
// History
//==============================================================
// 2/02/00 Initial version
// 2/16/00 Modified to conform to C9X
// 3/16/00 Improved speed
// 4/04/00 Unwind support added
// 5/30/00 Fixed bug when x double-extended denormal
// 8/15/00 Bundle added after call to __libm_error_support to properly
// set [the previously overwritten] GR_Parameter_RESULT.
//
// API
//==============================================================
// float logbf( float x);
//
// Overview of operation
//==============================================================
// The logbf function extracts the exponent of x as an integer in
// floating-point format.
// logbf computes log2 of x as a float
// logbf is similar to ilogbf but differs in the following ways:
// +-inf
// ilogbf: returns INT_MAX
// logbf: returns +inf
// Nan returns FP_LOGBNAN (which is either INT_MAX or INT_MIN)
// ilogbf: returns INT_MAX (7fffffff)
// logbf: returns QNAN (quietized SNAN)
// 0 returns FP_ILOGB0 (which is either INT_MIN or -INT_MAX)
// ilogbf: returns -INT_MAX (80000001)
// logbf: returns -inf, raises the divide-by-zero exception,
// and calls libm_error_support to set domain error
//
// Registers used
//==============================================================
// general registers used:
// ar.pfs r32
// r33 -> r37
// r38 -> r41 used as parameters to error path
//
// predicate registers used:
// p6, p7, p8
//
// floating-point registers used:
// f9, f10, f11
// f8, input
#include "libm_support.h"
GR_SAVE_B0 = r34
// r40 is address of table of coefficients
GR_SAVE_PFS = r32
GR_SAVE_GP = r35
GR_Parameter_X = r38
GR_Parameter_Y = r39
GR_Parameter_RESULT = r40
GR_Parameter_TAG = r41
FR_X = f8
FR_Y = f0
FR_RESULT = f10
.align 32
.global logbf#
.section .text
.proc logbf#
.align 32
logbf:
// qnan snan inf norm unorm 0 -+
// 0 0 0 0 1 0 11
// 0 b
{ .mfi
alloc r32=ar.pfs,1,5,4,0
(p0) fclass.m.unc p8,p0 = f8, 0x0b
nop.i 999
}
// X NORMAL
// r37 = exp(f8) - - 0xffff
// sig(f8) = r37
// f8 = convert_to_fp (sig))
{ .mfi
(p0) getf.exp r35 = f8
(p0) fnorm f10=f8
nop.i 999 ;;
}
// qnan snan inf norm unorm 0 -+
// 1 1 1 0 0 0 11
// e 3
{ .mmf
(p0) mov r33 = 0xffff
(p0) mov r34 = 0x1ffff
(p0) fclass.m.unc p6,p0 = f8, 0xe3 ;;
}
{ .mfb
(p0) and r36 = r35, r34
(p0) fclass.m.unc p7,p0 = f8, 0x07
(p8) br.cond.spnt L(LOGB_DENORM) ;;
}
{ .mib
(p0) sub r37 = r36, r33
nop.i 999
(p6) br.cond.spnt L(LOGB_NAN_INF) ;;
}
{ .mib
(p0) setf.sig f9 = r37
nop.i 999
(p7) br.cond.spnt L(LOGB_ZERO) ;;
}
{ .mfi
nop.m 999
(p0) fcvt.xf f10 = f9
nop.i 999 ;;
}
{ .mfb
nop.m 999
(p0) fnorm.s f8 = f10
(p0) br.ret.sptk b0 ;;
}
L(LOGB_DENORM):
// Form signexp of 2^64 in case need to scale denormal
// Check to see if double-extended denormal
{ .mfi
(p0) mov r38 = 0x1003f
(p0) fclass.m.unc p8,p0 = f10, 0x0b
nop.i 999 ;;
}
// Form 2^64 in case need to scale denormal
{ .mfi
(p0) setf.exp f11 = r38
nop.f 999
nop.i 999 ;;
}
// If double-extended denormal add 64 to exponent bias for scaling
// If double-extended denormal form x * 2^64 which is normal
{ .mfi
(p8) add r33 = 64, r33
(p8) fmpy f10 = f10, f11
nop.i 999 ;;
}
// Logic is the same as normal path but use normalized input
{ .mmi
(p0) getf.exp r35 = f10 ;;
nop.m 999
nop.i 999 ;;
}
{ .mmi
(p0) and r36 = r35, r34 ;;
(p0) sub r37 = r36, r33
nop.i 999 ;;
}
{ .mmi
(p0) setf.sig f9 = r37
nop.m 999
nop.i 999 ;;
}
{ .mfi
nop.m 999
(p0) fcvt.xf f10 = f9
nop.i 999 ;;
}
{ .mfb
nop.m 999
(p0) fnorm.s f8 = f10
(p0) br.ret.sptk b0 ;;
}
L(LOGB_NAN_INF):
// X NAN or INFINITY, return f8 * f8
{ .mfb
nop.m 999
(p0) fma.s f8= f8,f8,f0
(p0) br.ret.sptk b0 ;;
}
L(LOGB_ZERO):
// X ZERO
// return -1.0/fabs(f8)=-inf, set divide-by-zero flag, call error support
{ .mfi
nop.m 999
(p0) fmerge.s f9 = f0,f8
nop.i 999 ;;
}
{ .mfi
nop.m 999
(p0) fmerge.ns f10 = f0,f9
nop.i 999 ;;
}
{ .mfi
nop.m 999
(p0) frcpa f10,p6 = f1,f10
nop.i 999 ;;
}
.endp logbf
ASM_SIZE_DIRECTIVE(logbf)
.proc __libm_error_region
__libm_error_region:
.prologue
{ .mii
add GR_Parameter_Y=-32,sp // Parameter 2 value
(p0) mov GR_Parameter_TAG = 152
.save ar.pfs,GR_SAVE_PFS
mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
}
{ .mfi
.fframe 64
add sp=-64,sp // Create new stack
nop.f 0
mov GR_SAVE_GP=gp // Save gp
};;
{ .mmi
stfs [GR_Parameter_Y] = FR_Y,16 // Store Parameter 2 on stack
add GR_Parameter_X = 16,sp // Parameter 1 address
.save b0, GR_SAVE_B0
mov GR_SAVE_B0=b0 // Save b0
};;
.body
{ .mib
stfs [GR_Parameter_X] = FR_X // Store Parameter 1 on stack
add GR_Parameter_RESULT = 0,GR_Parameter_Y
nop.b 0 // Parameter 3 address
}
{ .mib
stfs [GR_Parameter_Y] = FR_RESULT // Store Parameter 3 on stack
add GR_Parameter_Y = -16,GR_Parameter_Y
br.call.sptk b0=__libm_error_support# // Call error handling function
};;
{ .mmi
nop.m 0
nop.m 0
add GR_Parameter_RESULT = 48,sp
};;
{ .mmi
ldfs f8 = [GR_Parameter_RESULT] // Get return result off stack
.restore sp
add sp = 64,sp // Restore stack pointer
mov b0 = GR_SAVE_B0 // Restore return address
};;
{ .mib
mov gp = GR_SAVE_GP // Restore gp
mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
br.ret.sptk b0 // Return
};;
.endp __libm_error_region
ASM_SIZE_DIRECTIVE(__libm_error_region)
.type __libm_error_support#,@function
.global __libm_error_support#

286
sysdeps/ia64/fpu/s_logbl.S Normal file
View File

@ -0,0 +1,286 @@
.file "logbl.s"
// Copyright (c) 2000, 2001, Intel Corporation
// All rights reserved.
//
// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
//
// WARRANTY DISCLAIMER
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
// http://developer.intel.com/opensource.
//
// History
//==============================================================
// 2/02/00 Initial version
// 2/16/00 Modified to conform to C9X
// 3/16/00 Improved speed
// 4/04/00 Unwind support added
// 5/30/00 Fixed bug when x double-extended denormal
// 8/15/00 Bundle added after call to __libm_error_support to properly
// set [the previously overwritten] GR_Parameter_RESULT.
//
// API
//==============================================================
// long double logbl( long double x);
//
// Overview of operation
//==============================================================
// The logbl function extracts the exponent of x as an integer in
// floating-point format.
// logbl computes log2 of x as a long double
//
// logbl is similar to ilogbl but differs in the following ways:
// +-inf
// ilogbl: returns INT_MAX
// logbl: returns +inf
// Nan returns FP_LOGBNAN (which is either INT_MAX or INT_MIN)
// ilogbl: returns INT_MAX (7fffffff)
// logbl: returns QNAN (quietized SNAN)
// 0 returns FP_ILOGB0 (which is either INT_MIN or -INT_MAX)
// ilogbl: returns -INT_MAX (80000001)
// logbl: returns -inf, raises the divide-by-zero exception,
// and calls libm_error_support to set domain error
//
// Registers used
//==============================================================
// general registers used:
// ar.pfs r32
// r33 -> r37
// r38 -> r41 used as parameters to error path
//
// predicate registers used:
// p6, p7, p8
//
// floating-point registers used:
// f9, f10, f11
// f8, input
#include "libm_support.h"
GR_SAVE_PFS = r32
GR_SAVE_B0 = r34
GR_SAVE_GP = r35
GR_Parameter_X = r38
GR_Parameter_Y = r39
GR_Parameter_RESULT = r40
GR_Parameter_TAG = r41
FR_X = f8
FR_Y = f0
FR_RESULT = f10
.align 32
.global logbl#
.section .text
.proc logbl#
.align 32
logbl:
// qnan snan inf norm unorm 0 -+
// 0 0 0 0 1 0 11
// 0 b
{ .mfi
alloc r32=ar.pfs,1,5,4,0
(p0) fclass.m.unc p8,p0 = f8, 0x0b
nop.i 999
}
// X NORMAL
// r37 = exp(f8) - - 0xffff
// sig(f8) = r37
// f8 = convert_to_fp (sig))
{ .mfi
(p0) getf.exp r35 = f8
(p0) fnorm f10=f8
nop.i 999 ;;
}
// qnan snan inf norm unorm 0 -+
// 1 1 1 0 0 0 11
// e 3
{ .mmf
(p0) mov r33 = 0xffff
(p0) mov r34 = 0x1ffff
(p0) fclass.m.unc p6,p0 = f8, 0xe3 ;;
}
{ .mfb
(p0) and r36 = r35, r34
(p0) fclass.m.unc p7,p0 = f8, 0x07
(p8) br.cond.spnt L(LOGB_DENORM) ;;
}
{ .mib
(p0) sub r37 = r36, r33
nop.i 999
(p6) br.cond.spnt L(LOGB_NAN_INF) ;;
}
{ .mib
(p0) setf.sig f9 = r37
nop.i 999
(p7) br.cond.spnt L(LOGB_ZERO) ;;
}
{ .mfi
nop.m 999
(p0) fcvt.xf f10 = f9
nop.i 999 ;;
}
{ .mfb
nop.m 999
(p0) fnorm f8 = f10
(p0) br.ret.sptk b0 ;;
}
L(LOGB_DENORM):
// Form signexp of 2^64 in case need to scale denormal
// Check to see if double-extended denormal
{ .mfi
(p0) mov r38 = 0x1003f
(p0) fclass.m.unc p8,p0 = f10, 0x0b
nop.i 999 ;;
}
// Form 2^64 in case need to scale denormal
{ .mfi
(p0) setf.exp f11 = r38
nop.f 999
nop.i 999 ;;
}
// If double-extended denormal add 64 to exponent bias for scaling
// If double-extended denormal form x * 2^64 which is normal
{ .mfi
(p8) add r33 = 64, r33
(p8) fmpy f10 = f10, f11
nop.i 999 ;;
}
// Logic is the same as normal path but use normalized input
{ .mmi
(p0) getf.exp r35 = f10 ;;
nop.m 999
nop.i 999 ;;
}
{ .mmi
(p0) and r36 = r35, r34 ;;
(p0) sub r37 = r36, r33
nop.i 999 ;;
}
{ .mmi
(p0) setf.sig f9 = r37
nop.m 999
nop.i 999 ;;
}
{ .mfi
nop.m 999
(p0) fcvt.xf f10 = f9
nop.i 999 ;;
}
{ .mfb
nop.m 999
(p0) fnorm f8 = f10
(p0) br.ret.sptk b0 ;;
}
L(LOGB_NAN_INF):
// X NAN or INFINITY, return f8 * f8
{ .mfb
nop.m 999
(p0) fma f8= f8,f8,f0
(p0) br.ret.sptk b0 ;;
}
L(LOGB_ZERO):
{.mfi
nop.m 0
(p0) frcpa.s0 f10,p6 = f1,f0
nop.i 0
};;
{.mfi
mov GR_Parameter_TAG = 150
(p0) fms.s1 f10 = f0,f0,f10
nop.i 0
};;
// X ZERO
// return -1.0/fabs(f8)=-inf, set divide-by-zero flag, call error support
.endp logbl
ASM_SIZE_DIRECTIVE(logbl)
.proc __libm_error_region
__libm_error_region:
.prologue
{ .mfi
add GR_Parameter_Y=-32,sp // Parameter 2 value
nop.f 0
.save ar.pfs,GR_SAVE_PFS
mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
}
{ .mfi
.fframe 64
add sp=-64,sp // Create new stack
nop.f 0
mov GR_SAVE_GP=gp // Save gp
};;
{ .mmi
stfe [GR_Parameter_Y] = FR_Y,16 // Save Parameter 2 on stack
add GR_Parameter_X = 16,sp // Parameter 1 address
.save b0, GR_SAVE_B0
mov GR_SAVE_B0=b0 // Save b0
};;
.body
{ .mib
stfe [GR_Parameter_X] = FR_X // Store Parameter 1 on stack
add GR_Parameter_RESULT = 0,GR_Parameter_Y
nop.b 0 // Parameter 3 address
}
{ .mib
stfe [GR_Parameter_Y] = FR_RESULT // Store Parameter 3 on stack
add GR_Parameter_Y = -16,GR_Parameter_Y
br.call.sptk b0=__libm_error_support# // Call error handling function
};;
{ .mmi
nop.m 0
nop.m 0
add GR_Parameter_RESULT = 48,sp
};;
{ .mmi
ldfe f8 = [GR_Parameter_RESULT] // Get return result off stack
.restore sp
add sp = 64,sp // Restore stack pointer
mov b0 = GR_SAVE_B0 // Restore return address
};;
{ .mib
mov gp = GR_SAVE_GP // Restore gp
mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
br.ret.sptk b0 // Return
};;
.endp __libm_error_region
ASM_SIZE_DIRECTIVE(__libm_error_region)
.type __libm_error_support#,@function
.global __libm_error_support#

View File

@ -0,0 +1,33 @@
/* Derived from: */
/*
* ====================================================
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
*
* Developed at SunPro, a Sun Microsystems, Inc. business.
* Permission to use, copy, modify, and distribute this
* software is freely granted, provided that this notice
* is preserved.
* ====================================================
*/
#include "math.h"
#include "math_private.h"
#include "libm_support.h"
#ifdef __STDC__
int
weak_function
__matherrf(struct exceptionf *x)
#else
int
weak_function
__matherrf(x)
struct exceptionf *x;
#endif
{
int n=0;
if(x->arg1!=x->arg1) return 0;
return n;
}
weak_alias (__matherrf, matherrf)

View File

@ -0,0 +1,33 @@
/* Derived from: */
/*
* ====================================================
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
*
* Developed at SunPro, a Sun Microsystems, Inc. business.
* Permission to use, copy, modify, and distribute this
* software is freely granted, provided that this notice
* is preserved.
* ====================================================
*/
#include "math.h"
#include "math_private.h"
#include "libm_support.h"
#ifdef __STDC__
int
weak_function
__matherrl(struct exceptionl *x)
#else
int
weak_function
__matherrl(x)
struct exceptionl *x;
#endif
{
int n=0;
if(x->arg1!=x->arg1) return 0;
return n;
}
weak_alias (__matherrl, matherrl)

272
sysdeps/ia64/fpu/s_modf.S Normal file
View File

@ -0,0 +1,272 @@
.file "modf.s"
// Copyright (c) 2000, 2001, Intel Corporation
// All rights reserved.
//
// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
//
// WARRANTY DISCLAIMER
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
// http://developer.intel.com/opensource.
//
// History
//==============================================================
// 2/02/00: Initial version
// 4/04/00: Improved speed, corrected result for NaN input
// 12/22/00 Fixed so inexact flag is never set, and invalid is not set for
// qnans nor for inputs larger than 2^63.
//
// API
//==============================================================
// double modf(double x, double *iptr)
// break a floating point x number into fraction and an exponent
//
// input floating point f8, address in r33
// output floating point f8 (x fraction), and *iptr (x integral part)
//
// OVERVIEW
//==============================================================
//
// NO FRACTIONAL PART: HUGE
// If
// for double-extended
// If the true exponent is greater than or equal 63
// 1003e ==> 1003e -ffff = 3f = 63(dec)
// for double
// If the true exponent is greater than or equal 52
// 10033 -ffff = 34 = 52(dec)
// for single
// If the true exponent is greater than or equal 23
// 10016 -ffff = 17 = 23(dec)
// then
// we are already an integer (p9 true)
// NO INTEGER PART: SMALL
// Is f8 exponent less than register bias (that is, is it
// less than 1). If it is, get the right sign of
// zero and store this in iptr.
// CALCULATION: NOT HUGE, NOT SMALL
// To get the integer part
// Take the floating-point input and truncate
// then convert this integer to fp Call it MODF_INTEGER_PART
// Subtract MODF_INTEGER_PART from MODF_NORM_F8 to get fraction part
// Then put fraction part in f8
// put integer part MODF_INTEGER_PART into *iptr
// Registers used
//==============================================================
// predicate registers used:
// p6 - p13
// 0xFFFF 0x10033
// -----------------------+-----------------+-------------
// SMALL | NORMAL | HUGE
// p11 --------------->|<----- p12 ----->| <-------------- p9
// p10 --------------------------------->|
// p13 --------------------------------------------------->|
//
#include "libm_support.h"
// floating-point registers used:
MODF_NORM_F8 = f9
MODF_FRACTION_PART = f10
MODF_INTEGER_PART = f11
MODF_INT_INTEGER_PART = f12
// general registers used
modf_signexp = r14
modf_GR_no_frac = r15
modf_GR_FFFF = r16
modf_17_ones = r17
modf_exp = r18
// r33 = iptr
.align 32
.global modf#
.section .text
.proc modf#
.align 32
// Main path is p9, p11, p8 FALSE and p12 TRUE
// Assume input is normalized and get signexp
// Normalize input just in case
// Form exponent bias
modf:
{ .mfi
getf.exp modf_signexp = f8
fnorm MODF_NORM_F8 = f8
addl modf_GR_FFFF = 0xffff, r0
}
// Get integer part of input
// Form exponent mask
{ .mfi
nop.m 999
fcvt.fx.trunc.s1 MODF_INT_INTEGER_PART = f8
mov modf_17_ones = 0x1ffff ;;
}
// Is x nan or inf?
// qnan snan inf norm unorm 0 -+
// 1 1 1 0 0 0 11 = 0xe3 NAN_INF
// Form biased exponent where input only has an integer part
{ .mfi
nop.m 999
fclass.m.unc p6,p13 = f8, 0xe3
addl modf_GR_no_frac = 0x10033, r0 ;;
}
// Mask to get exponent
// Is x unnorm?
// qnan snan inf norm unorm 0 -+
// 0 0 0 0 1 0 11 = 0x0b UNORM
// Set p13 to indicate calculation path, else p6 if nan or inf
{ .mfi
and modf_exp = modf_17_ones, modf_signexp
fclass.m.unc p8,p0 = f8, 0x0b
nop.i 999 ;;
}
// p11 <== SMALL, no integer part, fraction is everyting
// p9 <== HUGE, no fraction part, integer is everything
// p12 <== NORMAL, fraction part and integer part
{ .mii
(p13) cmp.lt.unc p11,p10 = modf_exp, modf_GR_FFFF
nop.i 999
nop.i 999 ;;
}
// Is x inf? p6 if inf, p7 if nan
{ .mfb
(p10) cmp.ge.unc p9,p12 = modf_exp, modf_GR_no_frac
(p6) fclass.m.unc p6,p7 = f8, 0x23
(p8) br.cond.spnt L(MODF_DENORM) ;;
}
L(MODF_COMMON):
// For HUGE set fraction to signed 0
{ .mfi
nop.m 999
(p9) fmerge.s f8 = f8,f0
nop.i 999
}
// For HUGE set integer part to normalized input
{ .mfi
nop.m 999
(p9) fnorm.d MODF_INTEGER_PART = MODF_NORM_F8
nop.i 999 ;;
}
// For SMALL set fraction to normalized input, integer part to signed 0
{ .mfi
nop.m 999
(p11) fmerge.s MODF_INTEGER_PART = f8,f0
nop.i 999
}
{ .mfi
nop.m 999
(p11) fnorm.d f8 = MODF_NORM_F8
nop.i 999 ;;
}
// For NORMAL float the integer part
{ .mfi
nop.m 999
(p12) fcvt.xf MODF_INTEGER_PART = MODF_INT_INTEGER_PART
nop.i 999 ;;
}
// If x inf set integer part to INF, fraction to signed 0
{ .mfi
(p6) stfd [r33] = MODF_NORM_F8
(p6) fmerge.s f8 = f8,f0
nop.i 999 ;;
}
// If x nan set integer and fraction parts to NaN (quietized)
{ .mfi
(p7) stfd [r33] = MODF_NORM_F8
(p7) fmerge.s f8 = MODF_NORM_F8, MODF_NORM_F8
nop.i 999 ;;
}
{ .mmi
(p9) stfd [r33] = MODF_INTEGER_PART
nop.m 999
nop.i 999 ;;
}
// For NORMAL compute fraction part
{ .mfi
(p11) stfd [r33] = MODF_INTEGER_PART
(p12) fms.d.s0 f8 = MODF_NORM_F8,f1, MODF_INTEGER_PART
nop.i 999 ;;
}
// For NORMAL test if fraction part is zero; if so append correct sign
{ .mfi
nop.m 999
(p12) fcmp.eq.unc p7,p0 = MODF_NORM_F8, MODF_INTEGER_PART
nop.i 999 ;;
}
{ .mfi
(p12) stfd [r33] = MODF_INTEGER_PART
nop.f 999
nop.i 999 ;;
}
// For NORMAL if fraction part is zero append sign of input
{ .mfb
nop.m 999
(p7) fmerge.s f8 = MODF_NORM_F8, f0
br.ret.sptk b0 ;;
}
L(MODF_DENORM):
// If x unorm get signexp from normalized input
// If x unorm get integer part from normalized input
{ .mfi
getf.exp modf_signexp = MODF_NORM_F8
fcvt.fx.trunc.s1 MODF_INT_INTEGER_PART = MODF_NORM_F8
nop.i 999 ;;
}
// If x unorm mask to get exponent
{ .mmi
and modf_exp = modf_17_ones, modf_signexp ;;
cmp.lt.unc p11,p10 = modf_exp, modf_GR_FFFF
nop.i 999 ;;
}
{ .mfb
(p10) cmp.ge.unc p9,p12 = modf_exp, modf_GR_no_frac
nop.f 999
br.cond.spnt L(MODF_COMMON) ;;
}
.endp modf
ASM_SIZE_DIRECTIVE(modf)

Some files were not shown because too many files have changed in this diff Show More