Optimize sparc {ceil,floor}{,f} using vis2 'siam' instruction.

* sysdeps/sparc/sparc-ifunc.h (SPARC_ASM_IFUNC2): New macro.
	(SPARC_ASM_VIS2_IFUNC): Likewise.
	(SPARC_ASM_VIS3_VIS2_IFUNC): Likewise.
	* sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_ceil-vis3.S: Make
	use of 'siam' instruction.
	* sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_ceilf-vis3.S:
	Likewise.
	* sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_floor-vis3.S:
	Likewise.
	* sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_floorf-vis3.S:
	Likewise.
	* sysdeps/sparc/sparc64/fpu/multiarch/s_ceil-vis3.S: Likewise.
	* sysdeps/sparc/sparc64/fpu/multiarch/s_ceilf-vis3.S: Likewise.
	* sysdeps/sparc/sparc64/fpu/multiarch/s_floor-vis3.S: Likewise.
	* sysdeps/sparc/sparc64/fpu/multiarch/s_floorf-vis3.S: Likewise.
	* sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_ceil-vis2.S: New
	file.
	* sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_ceilf-vis2.S: New
	file.
	* sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_floor-vis2.S: New
	file.
	* sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_floorf-vis2.S: New
	file.
	* sysdeps/sparc/sparc64/fpu/multiarch/s_ceil-vis2.S: New file.
	* sysdeps/sparc/sparc64/fpu/multiarch/s_ceilf-vis2.S: New file.
	* sysdeps/sparc/sparc64/fpu/multiarch/s_floor-vis2.S: New file.
	* sysdeps/sparc/sparc64/fpu/multiarch/s_floorf-vis2.S: New file.
	* sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_ceil.S: Hook in
	new VIS2 routines.
	* sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_ceilf.S: Likewise.
	* sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_floor.S: Likewise.
	* sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_floorf.S:
	Likewise.
	* sysdeps/sparc/sparc64/fpu/multiarch/s_ceil.S: Likewise.
	* sysdeps/sparc/sparc64/fpu/multiarch/s_ceilf.S: Likewise.
	* sysdeps/sparc/sparc64/fpu/multiarch/s_floor.S: Likewise.
	* sysdeps/sparc/sparc64/fpu/multiarch/s_floorf.S: Likewise.
	* sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/Makefile: Add new VIS2
	routines to libm-sysdep_routines.
	* sysdeps/sparc/sparc64/fpu/multiarch/Makefile: Likewise.
This commit is contained in:
David S. Miller 2013-01-14 21:47:29 -08:00
parent 65a82e3dd5
commit 8b954ab9b8
28 changed files with 670 additions and 223 deletions

View File

@ -1,5 +1,46 @@
2013-01-14 David S. Miller <davem@davemloft.net> 2013-01-14 David S. Miller <davem@davemloft.net>
* sysdeps/sparc/sparc-ifunc.h (SPARC_ASM_IFUNC2): New macro.
(SPARC_ASM_VIS2_IFUNC): Likewise.
(SPARC_ASM_VIS3_VIS2_IFUNC): Likewise.
* sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_ceil-vis3.S: Make
use of 'siam' instruction.
* sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_ceilf-vis3.S:
Likewise.
* sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_floor-vis3.S:
Likewise.
* sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_floorf-vis3.S:
Likewise.
* sysdeps/sparc/sparc64/fpu/multiarch/s_ceil-vis3.S: Likewise.
* sysdeps/sparc/sparc64/fpu/multiarch/s_ceilf-vis3.S: Likewise.
* sysdeps/sparc/sparc64/fpu/multiarch/s_floor-vis3.S: Likewise.
* sysdeps/sparc/sparc64/fpu/multiarch/s_floorf-vis3.S: Likewise.
* sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_ceil-vis2.S: New
file.
* sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_ceilf-vis2.S: New
file.
* sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_floor-vis2.S: New
file.
* sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_floorf-vis2.S: New
file.
* sysdeps/sparc/sparc64/fpu/multiarch/s_ceil-vis2.S: New file.
* sysdeps/sparc/sparc64/fpu/multiarch/s_ceilf-vis2.S: New file.
* sysdeps/sparc/sparc64/fpu/multiarch/s_floor-vis2.S: New file.
* sysdeps/sparc/sparc64/fpu/multiarch/s_floorf-vis2.S: New file.
* sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_ceil.S: Hook in
new VIS2 routines.
* sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_ceilf.S: Likewise.
* sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_floor.S: Likewise.
* sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_floorf.S:
Likewise.
* sysdeps/sparc/sparc64/fpu/multiarch/s_ceil.S: Likewise.
* sysdeps/sparc/sparc64/fpu/multiarch/s_ceilf.S: Likewise.
* sysdeps/sparc/sparc64/fpu/multiarch/s_floor.S: Likewise.
* sysdeps/sparc/sparc64/fpu/multiarch/s_floorf.S: Likewise.
* sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/Makefile: Add new VIS2
routines to libm-sysdep_routines.
* sysdeps/sparc/sparc64/fpu/multiarch/Makefile: Likewise.
* sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/Makefile: Add vis3 * sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/Makefile: Add vis3
fdim/fdimf to libm-sysdep_routines. fdim/fdimf to libm-sysdep_routines.
* sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_fdim-vis3.S: New * sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_fdim-vis3.S: New

View File

@ -51,6 +51,33 @@ ENTRY (__##name) \
mov %o1, %o0; \ mov %o1, %o0; \
END (__##name) END (__##name)
# define SPARC_ASM_IFUNC2(name, m1, f1, m2, f2, dflt) \
ENTRY (__##name) \
.type __##name, @gnu_indirect_function; \
SETUP_PIC_REG_LEAF(o3, o5); \
set m1, %o1; \
andcc %o0, %o1, %g0; \
be 8f; \
nop; \
sethi %gdop_hix22(f1), %o1; \
xor %o1, %gdop_lox10(f1), %o1; \
ba 10f; \
nop; \
8: set m2, %o1; \
andcc %o0, %o1, %g0; \
be 9f; \
nop; \
sethi %gdop_hix22(f2), %o1; \
xor %o1, %gdop_lox10(f2), %o1; \
ba 10f; \
nop; \
9: sethi %gdop_hix22(dflt), %o1; \
xor %o1, %gdop_lox10(dflt), %o1; \
10: add %o3, %o1, %o1; \
retl; \
mov %o1, %o0; \
END (__##name)
# else /* SHARED */ # else /* SHARED */
# ifdef __arch64__ # ifdef __arch64__
@ -82,19 +109,54 @@ ENTRY (__##name) \
mov %o1, %o0; \ mov %o1, %o0; \
END (__##name) END (__##name)
# define SPARC_ASM_IFUNC2(name, m1, f1, m2, f2, dflt) \
ENTRY (__##name) \
.type __##name, @gnu_indirect_function; \
set m1, %o1; \
andcc %o0, %o1, %g0; \
be 8f; \
nop; \
SET(f1, %g1, %o1); \
ba 10f; \
nop; \
8: set m2, %o1; \
andcc %o0, %o1, %g0; \
be 9f; \
nop; \
SET(f2, %g1, %o1); \
ba 10f; \
nop; \
9: SET(dflt, %g1, %o1); \
10: retl; \
mov %o1, %o0; \
END (__##name)
# endif /* SHARED */ # endif /* SHARED */
#define SPARC_ASM_VIS2_IFUNC(name) \
SPARC_ASM_IFUNC1(name, HWCAP_SPARC_VIS2, \
__##name##_vis2, __##name##_generic)
# ifdef HAVE_AS_VIS3_SUPPORT # ifdef HAVE_AS_VIS3_SUPPORT
#define SPARC_ASM_VIS3_IFUNC(name) \ #define SPARC_ASM_VIS3_IFUNC(name) \
SPARC_ASM_IFUNC1(name, HWCAP_SPARC_VIS3, \ SPARC_ASM_IFUNC1(name, HWCAP_SPARC_VIS3, \
__##name##_vis3, __##name##_generic) __##name##_vis3, __##name##_generic)
#define SPARC_ASM_VIS3_VIS2_IFUNC(name) \
SPARC_ASM_IFUNC2(name, HWCAP_SPARC_VIS3, \
__##name##_vis3, \
HWCAP_SPARC_VIS2, \
__##name##_vis2, __##name##_generic)
# else /* HAVE_AS_VIS3_SUPPORT */ # else /* HAVE_AS_VIS3_SUPPORT */
#define SPARC_ASM_VIS3_IFUNC(name) \ #define SPARC_ASM_VIS3_IFUNC(name) \
SPARC_ASM_IFUNC_DFLT(name, __##name##_generic) SPARC_ASM_IFUNC_DFLT(name, __##name##_generic)
#define SPARC_ASM_VIS3_VIS2_IFUNC(name) \
SPARC_ASM_VIS2_IFUNC(name)
# endif /* HAVE_AS_VIS3_SUPPORT */ # endif /* HAVE_AS_VIS3_SUPPORT */

View File

@ -1,4 +1,6 @@
ifeq ($(subdir),math) ifeq ($(subdir),math)
libm-sysdep_routines += s_ceil-vis2 s_ceilf-vis2 \
s_floor-vis2 s_floorf-vis2
ifeq ($(have-as-vis3),yes) ifeq ($(have-as-vis3),yes)
libm-sysdep_routines += m_copysignf-vis3 m_copysign-vis3 s_ceilf-vis3 \ libm-sysdep_routines += m_copysignf-vis3 m_copysign-vis3 s_ceilf-vis3 \
s_ceil-vis3 s_fabs-vis3 s_fabsf-vis3 s_floor-vis3 \ s_ceil-vis3 s_fabs-vis3 s_fabsf-vis3 s_floor-vis3 \

View File

@ -0,0 +1,61 @@
/* ceil function, sparc32 v9 vis2 version.
Copyright (C) 2013 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by David S. Miller <davem@davemloft.net>, 2013.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */
#include <sysdep.h>
/* 'siam' (Set Interval Arithmetic Mode) is used to quickly override
the rounding mode during this routine.
We add then subtract (or subtract than add if the initial
value was negative) 2**23 to the value, then subtract it
back out.
This will clear out the fractional portion of the value and,
with suitable 'siam' initiated rouding mode settings, round
the final result in the proper direction. */
#define TWO_FIFTYTWO 0x43300000 /* 2**52 */
#define ZERO %f10 /* 0.0 */
#define SIGN_BIT %f12 /* -0.0 */
ENTRY (__ceil_vis2)
sethi %hi(TWO_FIFTYTWO), %o2
sllx %o0, 32, %o0
or %o0, %o1, %o0
stx %o0, [%sp + 72]
sllx %o2, 32, %o2
fzero ZERO
ldd [%sp + 72], %f0
fnegd ZERO, SIGN_BIT
stx %o2, [%sp + 72]
fabsd %f0, %f14
ldd [%sp + 72], %f16
fcmpd %fcc3, %f14, %f16
fmovduge %fcc3, ZERO, %f16
fand %f0, SIGN_BIT, SIGN_BIT
for %f16, SIGN_BIT, %f16
siam (1 << 2) | 2
faddd %f0, %f16, %f18
siam (1 << 2) | 0
fsubd %f18, %f16, %f18
siam (0 << 2)
retl
for %f18, SIGN_BIT, %f0
END (__ceil_vis2)

View File

@ -19,27 +19,21 @@
#include <sysdep.h> #include <sysdep.h>
/* Since changing the rounding mode is extremely expensive, we /* 'siam' (Set Interval Arithmetic Mode) is used to quickly override
try to round up using a method that is rounding mode the rounding mode during this routine.
agnostic.
We add then subtract (or subtract than add if the initial We add then subtract (or subtract than add if the initial
value was negative) 2**23 to the value, then subtract it value was negative) 2**23 to the value, then subtract it
back out. back out.
This will clear out the fractional portion of the value. This will clear out the fractional portion of the value and,
One of two things will happen for non-whole initial values. with suitable 'siam' initiated rouding mode settings, round
Either the rounding mode will round it up, or it will be the final result in the proper direction.
rounded down. If the value started out whole, it will be
equal after the addition and subtraction. This means we
can accurately detect with one test whether we need to add
another 1.0 to round it up properly.
VIS instructions are used to facilitate the formation of We also use VIS3 moves to avoid using the stack to transfer
easier constants, and the propagation of the sign bit. */ values between float and integer registers. */
#define TWO_FIFTYTWO 0x43300000 /* 2**52 */ #define TWO_FIFTYTWO 0x43300000 /* 2**52 */
#define ONE_DOT_ZERO 0x3ff00000 /* 1.0 */
#define ZERO %f10 /* 0.0 */ #define ZERO %f10 /* 0.0 */
#define SIGN_BIT %f12 /* -0.0 */ #define SIGN_BIT %f12 /* -0.0 */
@ -47,32 +41,22 @@
ENTRY (__ceil_vis3) ENTRY (__ceil_vis3)
sethi %hi(TWO_FIFTYTWO), %o2 sethi %hi(TWO_FIFTYTWO), %o2
sllx %o0, 32, %o0 sllx %o0, 32, %o0
sethi %hi(ONE_DOT_ZERO), %o3 sllx %o2, 32, %o2
or %o0, %o1, %o0 or %o0, %o1, %o0
movxtod %o0, %f0 movxtod %o0, %f0
sllx %o2, 32, %o2
fzero ZERO fzero ZERO
sllx %o3, 32, %o3
fnegd ZERO, SIGN_BIT fnegd ZERO, SIGN_BIT
movxtod %o2, %f16 movxtod %o2, %f16
fabsd %f0, %f14 fabsd %f0, %f14
fcmpd %fcc3, %f14, %f16 fcmpd %fcc3, %f14, %f16
fmovduge %fcc3, ZERO, %f16 fmovduge %fcc3, ZERO, %f16
fand %f0, SIGN_BIT, SIGN_BIT fand %f0, SIGN_BIT, SIGN_BIT
for %f16, SIGN_BIT, %f16 for %f16, SIGN_BIT, %f16
siam (1 << 2) | 2
faddd %f0, %f16, %f18 faddd %f0, %f16, %f18
siam (1 << 2) | 0
fsubd %f18, %f16, %f18 fsubd %f18, %f16, %f18
fcmpd %fcc2, %f18, %f0 siam (0 << 2)
movxtod %o3, %f20
fmovduge %fcc2, ZERO, %f20
faddd %f18, %f20, %f0
fabsd %f0, %f0
retl retl
for %f0, SIGN_BIT, %f0 for %f18, SIGN_BIT, %f0
END (__ceil_vis3) END (__ceil_vis3)

View File

@ -1,7 +1,7 @@
#include <sparc-ifunc.h> #include <sparc-ifunc.h>
#include <math_ldbl_opt.h> #include <math_ldbl_opt.h>
SPARC_ASM_VIS3_IFUNC(ceil) SPARC_ASM_VIS3_VIS2_IFUNC(ceil)
weak_alias (__ceil, ceil) weak_alias (__ceil, ceil)

View File

@ -0,0 +1,58 @@
/* Float ceil function, sparc32 v9 vis2 version.
Copyright (C) 2013 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by David S. Miller <davem@davemloft.net>, 2013.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */
#include <sysdep.h>
/* 'siam' (Set Interval Arithmetic Mode) is used to quickly override
the rounding mode during this routine.
We add then subtract (or subtract than add if the initial
value was negative) 2**23 to the value, then subtract it
back out.
This will clear out the fractional portion of the value and,
with suitable 'siam' initiated rouding mode settings, round
the final result in the proper direction. */
#define TWO_TWENTYTHREE 0x4b000000 /* 2**23 */
#define ZERO %f10 /* 0.0 */
#define SIGN_BIT %f12 /* -0.0 */
ENTRY (__ceilf_vis2)
st %o0, [%sp + 68]
sethi %hi(TWO_TWENTYTHREE), %o2
fzeros ZERO
ld [%sp + 68], %f0
fnegs ZERO, SIGN_BIT
st %o2, [%sp + 68]
fabss %f0, %f14
ld [%sp + 68], %f16
fcmps %fcc3, %f14, %f16
fmovsuge %fcc3, ZERO, %f16
fands %f0, SIGN_BIT, SIGN_BIT
fors %f16, SIGN_BIT, %f16
siam (1 << 2) | 2
fadds %f0, %f16, %f1
siam (1 << 2) | 0
fsubs %f1, %f16, %f1
siam (0 << 2)
retl
fors %f1, SIGN_BIT, %f0
END (__ceilf_vis2)

View File

@ -19,27 +19,21 @@
#include <sysdep.h> #include <sysdep.h>
/* Since changing the rounding mode is extremely expensive, we /* 'siam' (Set Interval Arithmetic Mode) is used to quickly override
try to round up using a method that is rounding mode the rounding mode during this routine.
agnostic.
We add then subtract (or subtract than add if the initial We add then subtract (or subtract than add if the initial
value was negative) 2**23 to the value, then subtract it value was negative) 2**23 to the value, then subtract it
back out. back out.
This will clear out the fractional portion of the value. This will clear out the fractional portion of the value and,
One of two things will happen for non-whole initial values. with suitable 'siam' initiated rouding mode settings, round
Either the rounding mode will round it up, or it will be the final result in the proper direction.
rounded down. If the value started out whole, it will be
equal after the addition and subtraction. This means we
can accurately detect with one test whether we need to add
another 1.0 to round it up properly.
VIS instructions are used to facilitate the formation of We also use VIS3 moves to avoid using the stack to transfer
easier constants, and the propagation of the sign bit. */ values between float and integer registers. */
#define TWO_TWENTYTHREE 0x4b000000 /* 2**23 */ #define TWO_TWENTYTHREE 0x4b000000 /* 2**23 */
#define ONE_DOT_ZERO 0x3f800000 /* 1.0 */
#define ZERO %f10 /* 0.0 */ #define ZERO %f10 /* 0.0 */
#define SIGN_BIT %f12 /* -0.0 */ #define SIGN_BIT %f12 /* -0.0 */
@ -47,28 +41,19 @@
ENTRY (__ceilf_vis3) ENTRY (__ceilf_vis3)
movwtos %o0, %f0 movwtos %o0, %f0
sethi %hi(TWO_TWENTYTHREE), %o2 sethi %hi(TWO_TWENTYTHREE), %o2
sethi %hi(ONE_DOT_ZERO), %o3
fzeros ZERO fzeros ZERO
fnegs ZERO, SIGN_BIT fnegs ZERO, SIGN_BIT
movwtos %o2, %f16 movwtos %o2, %f16
fabss %f0, %f14 fabss %f0, %f14
fcmps %fcc3, %f14, %f16 fcmps %fcc3, %f14, %f16
fmovsuge %fcc3, ZERO, %f16 fmovsuge %fcc3, ZERO, %f16
fands %f0, SIGN_BIT, SIGN_BIT fands %f0, SIGN_BIT, SIGN_BIT
fors %f16, SIGN_BIT, %f16 fors %f16, SIGN_BIT, %f16
siam (1 << 2) | 2
fadds %f0, %f16, %f1 fadds %f0, %f16, %f1
siam (1 << 2) | 0
fsubs %f1, %f16, %f1 fsubs %f1, %f16, %f1
fcmps %fcc2, %f1, %f0 siam (0 << 2)
movwtos %o3, %f9
fmovsuge %fcc2, ZERO, %f9
fadds %f1, %f9, %f0
fabss %f0, %f0
retl retl
fors %f0, SIGN_BIT, %f0 fors %f1, SIGN_BIT, %f0
END (__ceilf_vis3) END (__ceilf_vis3)

View File

@ -1,6 +1,6 @@
#include <sparc-ifunc.h> #include <sparc-ifunc.h>
SPARC_ASM_VIS3_IFUNC(ceilf) SPARC_ASM_VIS3_VIS2_IFUNC(ceilf)
weak_alias (__ceilf, ceilf) weak_alias (__ceilf, ceilf)

View File

@ -0,0 +1,61 @@
/* floor function, sparc32 v9 vis2 version.
Copyright (C) 2013 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by David S. Miller <davem@davemloft.net>, 2013.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */
#include <sysdep.h>
/* 'siam' (Set Interval Arithmetic Mode) is used to quickly override
the rounding mode during this routine.
We add then subtract (or subtract than add if the initial
value was negative) 2**23 to the value, then subtract it
back out.
This will clear out the fractional portion of the value and,
with suitable 'siam' initiated rouding mode settings, round
the final result in the proper direction. */
#define TWO_FIFTYTWO 0x43300000 /* 2**52 */
#define ZERO %f10 /* 0.0 */
#define SIGN_BIT %f12 /* -0.0 */
ENTRY (__floor_vis2)
sethi %hi(TWO_FIFTYTWO), %o2
sllx %o0, 32, %o0
or %o0, %o1, %o0
stx %o0, [%sp + 72]
sllx %o2, 32, %o2
fzero ZERO
ldd [%sp + 72], %f0
fnegd ZERO, SIGN_BIT
stx %o2, [%sp + 72]
fabsd %f0, %f14
ldd [%sp + 72], %f16
fcmpd %fcc3, %f14, %f16
fmovduge %fcc3, ZERO, %f16
fand %f0, SIGN_BIT, SIGN_BIT
for %f16, SIGN_BIT, %f16
siam (1 << 2) | 3
faddd %f0, %f16, %f18
siam (1 << 2) | 0
fsubd %f18, %f16, %f18
siam (0 << 2)
retl
for %f18, SIGN_BIT, %f0
END (__floor_vis2)

View File

@ -19,27 +19,21 @@
#include <sysdep.h> #include <sysdep.h>
/* Since changing the rounding mode is extremely expensive, we /* 'siam' (Set Interval Arithmetic Mode) is used to quickly override
try to round up using a method that is rounding mode the rounding mode during this routine.
agnostic.
We add then subtract (or subtract than add if the initial We add then subtract (or subtract than add if the initial
value was negative) 2**23 to the value, then subtract it value was negative) 2**23 to the value, then subtract it
back out. back out.
This will clear out the fractional portion of the value. This will clear out the fractional portion of the value and,
One of two things will happen for non-whole initial values. with suitable 'siam' initiated rouding mode settings, round
Either the rounding mode will round it up, or it will be the final result in the proper direction.
rounded down. If the value started out whole, it will be
equal after the addition and subtraction. This means we
can accurately detect with one test whether we need to add
another 1.0 to round it up properly.
VIS instructions are used to facilitate the formation of We also use VIS3 moves to avoid using the stack to transfer
easier constants, and the propagation of the sign bit. */ values between float and integer registers. */
#define TWO_FIFTYTWO 0x43300000 /* 2**52 */ #define TWO_FIFTYTWO 0x43300000 /* 2**52 */
#define ONE_DOT_ZERO 0x3ff00000 /* 1.0 */
#define ZERO %f10 /* 0.0 */ #define ZERO %f10 /* 0.0 */
#define SIGN_BIT %f12 /* -0.0 */ #define SIGN_BIT %f12 /* -0.0 */
@ -47,32 +41,22 @@
ENTRY (__floor_vis3) ENTRY (__floor_vis3)
sethi %hi(TWO_FIFTYTWO), %o2 sethi %hi(TWO_FIFTYTWO), %o2
sllx %o0, 32, %o0 sllx %o0, 32, %o0
sethi %hi(ONE_DOT_ZERO), %o3 sllx %o2, 32, %o2
or %o0, %o1, %o0 or %o0, %o1, %o0
movxtod %o0, %f0 movxtod %o0, %f0
sllx %o2, 32, %o2
fzero ZERO fzero ZERO
sllx %o3, 32, %o3
fnegd ZERO, SIGN_BIT fnegd ZERO, SIGN_BIT
movxtod %o2, %f16 movxtod %o2, %f16
fabsd %f0, %f14 fabsd %f0, %f14
fcmpd %fcc3, %f14, %f16 fcmpd %fcc3, %f14, %f16
fmovduge %fcc3, ZERO, %f16 fmovduge %fcc3, ZERO, %f16
fand %f0, SIGN_BIT, SIGN_BIT fand %f0, SIGN_BIT, SIGN_BIT
for %f16, SIGN_BIT, %f16 for %f16, SIGN_BIT, %f16
siam (1 << 2) | 3
faddd %f0, %f16, %f18 faddd %f0, %f16, %f18
siam (1 << 2) | 0
fsubd %f18, %f16, %f18 fsubd %f18, %f16, %f18
fcmpd %fcc2, %f18, %f0 siam (0 << 2)
movxtod %o3, %f20
fmovdule %fcc2, ZERO, %f20
fsubd %f18, %f20, %f0
fabsd %f0, %f0
retl retl
for %f0, SIGN_BIT, %f0 for %f18, SIGN_BIT, %f0
END (__floor_vis3) END (__floor_vis3)

View File

@ -1,7 +1,7 @@
#include <sparc-ifunc.h> #include <sparc-ifunc.h>
#include <math_ldbl_opt.h> #include <math_ldbl_opt.h>
SPARC_ASM_VIS3_IFUNC(floor) SPARC_ASM_VIS3_VIS2_IFUNC(floor)
weak_alias (__floor, floor) weak_alias (__floor, floor)

View File

@ -0,0 +1,58 @@
/* Float floor function, sparc32 v9 vis2 version.
Copyright (C) 2013 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by David S. Miller <davem@davemloft.net>, 2013.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */
#include <sysdep.h>
/* 'siam' (Set Interval Arithmetic Mode) is used to quickly override
the rounding mode during this routine.
We add then subtract (or subtract than add if the initial
value was negative) 2**23 to the value, then subtract it
back out.
This will clear out the fractional portion of the value and,
with suitable 'siam' initiated rouding mode settings, round
the final result in the proper direction. */
#define TWO_TWENTYTHREE 0x4b000000 /* 2**23 */
#define ZERO %f10 /* 0.0 */
#define SIGN_BIT %f12 /* -0.0 */
ENTRY (__floorf_vis2)
st %o0, [%sp + 68]
sethi %hi(TWO_TWENTYTHREE), %o2
fzeros ZERO
ld [%sp + 68], %f0
fnegs ZERO, SIGN_BIT
st %o2, [%sp + 68]
fabss %f0, %f14
ld [%sp + 68], %f16
fcmps %fcc3, %f14, %f16
fmovsuge %fcc3, ZERO, %f16
fands %f0, SIGN_BIT, SIGN_BIT
fors %f16, SIGN_BIT, %f16
siam (1 << 2) | 3
fadds %f0, %f16, %f1
siam (1 << 2) | 0
fsubs %f1, %f16, %f1
siam (0 << 2)
retl
fors %f1, SIGN_BIT, %f0
END (__floorf_vis2)

View File

@ -19,27 +19,21 @@
#include <sysdep.h> #include <sysdep.h>
/* Since changing the rounding mode is extremely expensive, we /* 'siam' (Set Interval Arithmetic Mode) is used to quickly override
try to round up using a method that is rounding mode the rounding mode during this routine.
agnostic.
We add then subtract (or subtract than add if the initial We add then subtract (or subtract than add if the initial
value was negative) 2**23 to the value, then subtract it value was negative) 2**23 to the value, then subtract it
back out. back out.
This will clear out the fractional portion of the value. This will clear out the fractional portion of the value and,
One of two things will happen for non-whole initial values. with suitable 'siam' initiated rouding mode settings, round
Either the rounding mode will round it up, or it will be the final result in the proper direction.
rounded down. If the value started out whole, it will be
equal after the addition and subtraction. This means we
can accurately detect with one test whether we need to add
another 1.0 to round it up properly.
VIS instructions are used to facilitate the formation of We also use VIS3 moves to avoid using the stack to transfer
easier constants, and the propagation of the sign bit. */ values between float and integer registers. */
#define TWO_TWENTYTHREE 0x4b000000 /* 2**23 */ #define TWO_TWENTYTHREE 0x4b000000 /* 2**23 */
#define ONE_DOT_ZERO 0x3f800000 /* 1.0 */
#define ZERO %f10 /* 0.0 */ #define ZERO %f10 /* 0.0 */
#define SIGN_BIT %f12 /* -0.0 */ #define SIGN_BIT %f12 /* -0.0 */
@ -47,28 +41,19 @@
ENTRY (__floorf_vis3) ENTRY (__floorf_vis3)
movwtos %o0, %f0 movwtos %o0, %f0
sethi %hi(TWO_TWENTYTHREE), %o2 sethi %hi(TWO_TWENTYTHREE), %o2
sethi %hi(ONE_DOT_ZERO), %o3
fzeros ZERO fzeros ZERO
fnegs ZERO, SIGN_BIT fnegs ZERO, SIGN_BIT
movwtos %o2, %f16 movwtos %o2, %f16
fabss %f0, %f14 fabss %f0, %f14
fcmps %fcc3, %f14, %f16 fcmps %fcc3, %f14, %f16
fmovsuge %fcc3, ZERO, %f16 fmovsuge %fcc3, ZERO, %f16
fands %f0, SIGN_BIT, SIGN_BIT fands %f0, SIGN_BIT, SIGN_BIT
fors %f16, SIGN_BIT, %f16 fors %f16, SIGN_BIT, %f16
siam (1 << 2) | 3
fadds %f0, %f16, %f1 fadds %f0, %f16, %f1
siam (1 << 2) | 0
fsubs %f1, %f16, %f1 fsubs %f1, %f16, %f1
fcmps %fcc2, %f1, %f0 siam (0 << 2)
movwtos %o3, %f9
fmovsule %fcc2, ZERO, %f9
fsubs %f1, %f9, %f0
fabss %f0, %f0
retl retl
fors %f0, SIGN_BIT, %f0 fors %f1, SIGN_BIT, %f0
END (__floorf_vis3) END (__floorf_vis3)

View File

@ -1,6 +1,6 @@
#include <sparc-ifunc.h> #include <sparc-ifunc.h>
SPARC_ASM_VIS3_IFUNC(floorf) SPARC_ASM_VIS3_VIS2_IFUNC(floorf)
weak_alias (__floorf, floorf) weak_alias (__floorf, floorf)

View File

@ -1,4 +1,6 @@
ifeq ($(subdir),math) ifeq ($(subdir),math)
libm-sysdep_routines += s_ceil-vis2 s_ceilf-vis2 \
s_floor-vis2 s_floorf-vis2
ifeq ($(have-as-vis3),yes) ifeq ($(have-as-vis3),yes)
libm-sysdep_routines += m_signbitf-vis3 m_signbit-vis3 s_ceilf-vis3 \ libm-sysdep_routines += m_signbitf-vis3 m_signbit-vis3 s_ceilf-vis3 \
s_ceil-vis3 m_finitef-vis3 m_finite-vis3 \ s_ceil-vis3 m_finitef-vis3 m_finite-vis3 \

View File

@ -0,0 +1,57 @@
/* ceil function, sparc64 vis2 version.
Copyright (C) 2012-2013 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by David S. Miller <davem@davemloft.net>, 2012.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */
#include <sysdep.h>
/* 'siam' (Set Interval Arithmetic Mode) is used to quickly override
the rounding mode during this routine.
We add then subtract (or subtract than add if the initial
value was negative) 2**23 to the value, then subtract it
back out.
This will clear out the fractional portion of the value and,
with suitable 'siam' initiated rouding mode settings, round
the final result in the proper direction. */
#define TWO_FIFTYTWO 0x43300000 /* 2**52 */
#define ZERO %f10 /* 0.0 */
#define SIGN_BIT %f12 /* -0.0 */
ENTRY (__ceil_vis2)
sethi %hi(TWO_FIFTYTWO), %o2
fzero ZERO
sllx %o2, 32, %o2
fnegd ZERO, SIGN_BIT
stx %o2, [%sp + STACK_BIAS + 128]
fabsd %f0, %f14
ldd [%sp + STACK_BIAS + 128], %f16
fcmpd %fcc3, %f14, %f16
fmovduge %fcc3, ZERO, %f16
fand %f0, SIGN_BIT, SIGN_BIT
for %f16, SIGN_BIT, %f16
siam (1 << 2) | 2
faddd %f0, %f16, %f18
siam (1 << 2) | 0
fsubd %f18, %f16, %f18
siam (0 << 2)
retl
for %f18, SIGN_BIT, %f0
END (__ceil_vis2)

View File

@ -19,57 +19,41 @@
#include <sysdep.h> #include <sysdep.h>
/* Since changing the rounding mode is extremely expensive, we /* 'siam' (Set Interval Arithmetic Mode) is used to quickly override
try to round up using a method that is rounding mode the rounding mode during this routine.
agnostic.
We add then subtract (or subtract than add if the initial We add then subtract (or subtract than add if the initial
value was negative) 2**23 to the value, then subtract it value was negative) 2**23 to the value, then subtract it
back out. back out.
This will clear out the fractional portion of the value. This will clear out the fractional portion of the value and,
One of two things will happen for non-whole initial values. with suitable 'siam' initiated rouding mode settings, round
Either the rounding mode will round it up, or it will be the final result in the proper direction.
rounded down. If the value started out whole, it will be
equal after the addition and subtraction. This means we
can accurately detect with one test whether we need to add
another 1.0 to round it up properly.
VIS instructions are used to facilitate the formation of We also use VIS3 moves to avoid using the stack to transfer
easier constants, and the propagation of the sign bit. */ values between float and integer registers. */
#define TWO_FIFTYTWO 0x43300000 /* 2**52 */ #define TWO_FIFTYTWO 0x43300000 /* 2**52 */
#define ONE_DOT_ZERO 0x3ff00000 /* 1.0 */
#define ZERO %f10 /* 0.0 */ #define ZERO %f10 /* 0.0 */
#define SIGN_BIT %f12 /* -0.0 */ #define SIGN_BIT %f12 /* -0.0 */
ENTRY (__ceil_vis3) ENTRY (__ceil_vis3)
sethi %hi(TWO_FIFTYTWO), %o2 sethi %hi(TWO_FIFTYTWO), %o2
sethi %hi(ONE_DOT_ZERO), %o3
fzero ZERO fzero ZERO
sllx %o2, 32, %o2 sllx %o2, 32, %o2
fnegd ZERO, SIGN_BIT fnegd ZERO, SIGN_BIT
sllx %o3, 32, %o3
movxtod %o2, %f16 movxtod %o2, %f16
fabsd %f0, %f14 fabsd %f0, %f14
fcmpd %fcc3, %f14, %f16 fcmpd %fcc3, %f14, %f16
fmovduge %fcc3, ZERO, %f16 fmovduge %fcc3, ZERO, %f16
fand %f0, SIGN_BIT, SIGN_BIT fand %f0, SIGN_BIT, SIGN_BIT
for %f16, SIGN_BIT, %f16 for %f16, SIGN_BIT, %f16
siam (1 << 2) | 2
faddd %f0, %f16, %f18 faddd %f0, %f16, %f18
siam (1 << 2) | 0
fsubd %f18, %f16, %f18 fsubd %f18, %f16, %f18
fcmpd %fcc2, %f18, %f0 siam (0 << 2)
movxtod %o3, %f20
fmovduge %fcc2, ZERO, %f20
faddd %f18, %f20, %f0
fabsd %f0, %f0
retl retl
for %f0, SIGN_BIT, %f0 for %f18, SIGN_BIT, %f0
END (__ceil_vis3) END (__ceil_vis3)

View File

@ -1,6 +1,6 @@
#include <sparc-ifunc.h> #include <sparc-ifunc.h>
SPARC_ASM_VIS3_IFUNC(ceil) SPARC_ASM_VIS3_VIS2_IFUNC(ceil)
weak_alias (__ceil, ceil) weak_alias (__ceil, ceil)

View File

@ -0,0 +1,56 @@
/* Float ceil function, sparc64 vis2 version.
Copyright (C) 2012-2013 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by David S. Miller <davem@davemloft.net>, 2012.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */
#include <sysdep.h>
/* 'siam' (Set Interval Arithmetic Mode) is used to quickly override
the rounding mode during this routine.
We add then subtract (or subtract than add if the initial
value was negative) 2**23 to the value, then subtract it
back out.
This will clear out the fractional portion of the value and,
with suitable 'siam' initiated rouding mode settings, round
the final result in the proper direction. */
#define TWO_TWENTYTHREE 0x4b000000 /* 2**23 */
#define ZERO %f10 /* 0.0 */
#define SIGN_BIT %f12 /* -0.0 */
ENTRY (__ceilf_vis2)
sethi %hi(TWO_TWENTYTHREE), %o2
fzeros ZERO
fnegs ZERO, SIGN_BIT
st %o2, [%sp + STACK_BIAS + 128]
fabss %f1, %f14
ld [%sp + STACK_BIAS + 128], %f16
fcmps %fcc3, %f14, %f16
fmovsuge %fcc3, ZERO, %f16
fands %f1, SIGN_BIT, SIGN_BIT
fors %f16, SIGN_BIT, %f16
siam (1 << 2) | 2
fadds %f1, %f16, %f5
siam (1 << 2) | 0
fsubs %f5, %f16, %f5
siam (0 << 2)
retl
fors %f5, SIGN_BIT, %f0
END (__ceilf_vis2)

View File

@ -19,55 +19,40 @@
#include <sysdep.h> #include <sysdep.h>
/* Since changing the rounding mode is extremely expensive, we /* 'siam' (Set Interval Arithmetic Mode) is used to quickly override
try to round up using a method that is rounding mode the rounding mode during this routine.
agnostic.
We add then subtract (or subtract than add if the initial We add then subtract (or subtract than add if the initial
value was negative) 2**23 to the value, then subtract it value was negative) 2**23 to the value, then subtract it
back out. back out.
This will clear out the fractional portion of the value. This will clear out the fractional portion of the value and,
One of two things will happen for non-whole initial values. with suitable 'siam' initiated rouding mode settings, round
Either the rounding mode will round it up, or it will be the final result in the proper direction.
rounded down. If the value started out whole, it will be
equal after the addition and subtraction. This means we
can accurately detect with one test whether we need to add
another 1.0 to round it up properly.
VIS instructions are used to facilitate the formation of We also use VIS3 moves to avoid using the stack to transfer
easier constants, and the propagation of the sign bit. */ values between float and integer registers. */
#define TWO_TWENTYTHREE 0x4b000000 /* 2**23 */ #define TWO_TWENTYTHREE 0x4b000000 /* 2**23 */
#define ONE_DOT_ZERO 0x3f800000 /* 1.0 */
#define ZERO %f10 /* 0.0 */ #define ZERO %f10 /* 0.0 */
#define SIGN_BIT %f12 /* -0.0 */ #define SIGN_BIT %f12 /* -0.0 */
ENTRY (__ceilf_vis3) ENTRY (__ceilf_vis3)
sethi %hi(TWO_TWENTYTHREE), %o2 sethi %hi(TWO_TWENTYTHREE), %o2
sethi %hi(ONE_DOT_ZERO), %o3
fzeros ZERO fzeros ZERO
fnegs ZERO, SIGN_BIT fnegs ZERO, SIGN_BIT
movwtos %o2, %f16 movwtos %o2, %f16
fabss %f1, %f14 fabss %f1, %f14
fcmps %fcc3, %f14, %f16 fcmps %fcc3, %f14, %f16
fmovsuge %fcc3, ZERO, %f16 fmovsuge %fcc3, ZERO, %f16
fands %f1, SIGN_BIT, SIGN_BIT fands %f1, SIGN_BIT, SIGN_BIT
fors %f16, SIGN_BIT, %f16 fors %f16, SIGN_BIT, %f16
siam (1 << 2) | 2
fadds %f1, %f16, %f5 fadds %f1, %f16, %f5
siam (1 << 2) | 0
fsubs %f5, %f16, %f5 fsubs %f5, %f16, %f5
fcmps %fcc2, %f5, %f1 siam (0 << 2)
movwtos %o3, %f9
fmovsuge %fcc2, ZERO, %f9
fadds %f5, %f9, %f0
fabss %f0, %f0
retl retl
fors %f0, SIGN_BIT, %f0 fors %f5, SIGN_BIT, %f0
END (__ceilf_vis3) END (__ceilf_vis3)

View File

@ -1,6 +1,6 @@
#include <sparc-ifunc.h> #include <sparc-ifunc.h>
SPARC_ASM_VIS3_IFUNC(ceilf) SPARC_ASM_VIS3_VIS2_IFUNC(ceilf)
weak_alias (__ceilf, ceilf) weak_alias (__ceilf, ceilf)

View File

@ -0,0 +1,57 @@
/* floor function, sparc64 vis2 version.
Copyright (C) 2012-2013 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by David S. Miller <davem@davemloft.net>, 2012.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */
#include <sysdep.h>
/* 'siam' (Set Interval Arithmetic Mode) is used to quickly override
the rounding mode during this routine.
We add then subtract (or subtract than add if the initial
value was negative) 2**23 to the value, then subtract it
back out.
This will clear out the fractional portion of the value and,
with suitable 'siam' initiated rouding mode settings, round
the final result in the proper direction. */
#define TWO_FIFTYTWO 0x43300000 /* 2**52 */
#define ZERO %f10 /* 0.0 */
#define SIGN_BIT %f12 /* -0.0 */
ENTRY (__floor_vis2)
sethi %hi(TWO_FIFTYTWO), %o2
fzero ZERO
sllx %o2, 32, %o2
fnegd ZERO, SIGN_BIT
stx %o2, [%sp + STACK_BIAS + 128]
fabsd %f0, %f14
ldd [%sp + STACK_BIAS + 128], %f16
fcmpd %fcc3, %f14, %f16
fmovduge %fcc3, ZERO, %f16
fand %f0, SIGN_BIT, SIGN_BIT
for %f16, SIGN_BIT, %f16
siam (1 << 2) | 3
faddd %f0, %f16, %f18
siam (1 << 2) | 0
fsubd %f18, %f16, %f18
siam (0 << 2)
retl
for %f18, SIGN_BIT, %f0
END (__floor_vis2)

View File

@ -19,57 +19,41 @@
#include <sysdep.h> #include <sysdep.h>
/* Since changing the rounding mode is extremely expensive, we /* 'siam' (Set Interval Arithmetic Mode) is used to quickly override
try to round up using a method that is rounding mode the rounding mode during this routine.
agnostic.
We add then subtract (or subtract than add if the initial We add then subtract (or subtract than add if the initial
value was negative) 2**23 to the value, then subtract it value was negative) 2**23 to the value, then subtract it
back out. back out.
This will clear out the fractional portion of the value. This will clear out the fractional portion of the value and,
One of two things will happen for non-whole initial values. with suitable 'siam' initiated rouding mode settings, round
Either the rounding mode will round it up, or it will be the final result in the proper direction.
rounded down. If the value started out whole, it will be
equal after the addition and subtraction. This means we
can accurately detect with one test whether we need to add
another 1.0 to round it up properly.
VIS instructions are used to facilitate the formation of We also use VIS3 moves to avoid using the stack to transfer
easier constants, and the propagation of the sign bit. */ values between float and integer registers. */
#define TWO_FIFTYTWO 0x43300000 /* 2**52 */ #define TWO_FIFTYTWO 0x43300000 /* 2**52 */
#define ONE_DOT_ZERO 0x3ff00000 /* 1.0 */
#define ZERO %f10 /* 0.0 */ #define ZERO %f10 /* 0.0 */
#define SIGN_BIT %f12 /* -0.0 */ #define SIGN_BIT %f12 /* -0.0 */
ENTRY (__floor_vis3) ENTRY (__floor_vis3)
sethi %hi(TWO_FIFTYTWO), %o2 sethi %hi(TWO_FIFTYTWO), %o2
sethi %hi(ONE_DOT_ZERO), %o3
fzero ZERO fzero ZERO
sllx %o2, 32, %o2 sllx %o2, 32, %o2
fnegd ZERO, SIGN_BIT fnegd ZERO, SIGN_BIT
sllx %o3, 32, %o3
movxtod %o2, %f16 movxtod %o2, %f16
fabsd %f0, %f14 fabsd %f0, %f14
fcmpd %fcc3, %f14, %f16 fcmpd %fcc3, %f14, %f16
fmovduge %fcc3, ZERO, %f16 fmovduge %fcc3, ZERO, %f16
fand %f0, SIGN_BIT, SIGN_BIT fand %f0, SIGN_BIT, SIGN_BIT
for %f16, SIGN_BIT, %f16 for %f16, SIGN_BIT, %f16
siam (1 << 2) | 3
faddd %f0, %f16, %f18 faddd %f0, %f16, %f18
siam (1 << 2) | 0
fsubd %f18, %f16, %f18 fsubd %f18, %f16, %f18
fcmpd %fcc2, %f18, %f0 siam (0 << 2)
movxtod %o3, %f20
fmovdule %fcc2, ZERO, %f20
fsubd %f18, %f20, %f0
fabsd %f0, %f0
retl retl
for %f0, SIGN_BIT, %f0 for %f18, SIGN_BIT, %f0
END (__floor_vis3) END (__floor_vis3)

View File

@ -1,6 +1,6 @@
#include <sparc-ifunc.h> #include <sparc-ifunc.h>
SPARC_ASM_VIS3_IFUNC(floor) SPARC_ASM_VIS3_VIS2_IFUNC(floor)
weak_alias (__floor, floor) weak_alias (__floor, floor)

View File

@ -0,0 +1,56 @@
/* Float floor function, sparc64 vis2 version.
Copyright (C) 2012-2013 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by David S. Miller <davem@davemloft.net>, 2012.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */
#include <sysdep.h>
/* 'siam' (Set Interval Arithmetic Mode) is used to quickly override
the rounding mode during this routine.
We add then subtract (or subtract than add if the initial
value was negative) 2**23 to the value, then subtract it
back out.
This will clear out the fractional portion of the value and,
with suitable 'siam' initiated rouding mode settings, round
the final result in the proper direction. */
#define TWO_TWENTYTHREE 0x4b000000 /* 2**23 */
#define ZERO %f10 /* 0.0 */
#define SIGN_BIT %f12 /* -0.0 */
ENTRY (__floorf_vis2)
sethi %hi(TWO_TWENTYTHREE), %o2
fzeros ZERO
fnegs ZERO, SIGN_BIT
st %o2, [%sp + STACK_BIAS + 128]
fabss %f1, %f14
ld [%sp + STACK_BIAS + 128], %f16
fcmps %fcc3, %f14, %f16
fmovsuge %fcc3, ZERO, %f16
fands %f1, SIGN_BIT, SIGN_BIT
fors %f16, SIGN_BIT, %f16
siam (1 << 2) | 3
fadds %f1, %f16, %f5
siam (1 << 2) | 0
fsubs %f5, %f16, %f5
siam (0 << 2)
retl
fors %f5, SIGN_BIT, %f0
END (__floorf_vis2)

View File

@ -19,55 +19,40 @@
#include <sysdep.h> #include <sysdep.h>
/* Since changing the rounding mode is extremely expensive, we /* 'siam' (Set Interval Arithmetic Mode) is used to quickly override
try to round up using a method that is rounding mode the rounding mode during this routine.
agnostic.
We add then subtract (or subtract than add if the initial We add then subtract (or subtract than add if the initial
value was negative) 2**23 to the value, then subtract it value was negative) 2**23 to the value, then subtract it
back out. back out.
This will clear out the fractional portion of the value. This will clear out the fractional portion of the value and,
One of two things will happen for non-whole initial values. with suitable 'siam' initiated rouding mode settings, round
Either the rounding mode will round it up, or it will be the final result in the proper direction.
rounded down. If the value started out whole, it will be
equal after the addition and subtraction. This means we
can accurately detect with one test whether we need to add
another 1.0 to round it up properly.
VIS instructions are used to facilitate the formation of We also use VIS3 moves to avoid using the stack to transfer
easier constants, and the propagation of the sign bit. */ values between float and integer registers. */
#define TWO_TWENTYTHREE 0x4b000000 /* 2**23 */ #define TWO_TWENTYTHREE 0x4b000000 /* 2**23 */
#define ONE_DOT_ZERO 0x3f800000 /* 1.0 */
#define ZERO %f10 /* 0.0 */ #define ZERO %f10 /* 0.0 */
#define SIGN_BIT %f12 /* -0.0 */ #define SIGN_BIT %f12 /* -0.0 */
ENTRY (__floorf_vis3) ENTRY (__floorf_vis3)
sethi %hi(TWO_TWENTYTHREE), %o2 sethi %hi(TWO_TWENTYTHREE), %o2
sethi %hi(ONE_DOT_ZERO), %o3
fzeros ZERO fzeros ZERO
fnegs ZERO, SIGN_BIT fnegs ZERO, SIGN_BIT
movwtos %o2, %f16
movwtos %o2, %f16
fabss %f1, %f14 fabss %f1, %f14
fcmps %fcc3, %f14, %f16 fcmps %fcc3, %f14, %f16
fmovsuge %fcc3, ZERO, %f16 fmovsuge %fcc3, ZERO, %f16
fands %f1, SIGN_BIT, SIGN_BIT fands %f1, SIGN_BIT, SIGN_BIT
fors %f16, SIGN_BIT, %f16 fors %f16, SIGN_BIT, %f16
siam (1 << 2) | 3
fadds %f1, %f16, %f5 fadds %f1, %f16, %f5
siam (1 << 2) | 0
fsubs %f5, %f16, %f5 fsubs %f5, %f16, %f5
fcmps %fcc2, %f5, %f1 siam (0 << 2)
movwtos %o3, %f9
fmovsule %fcc2, ZERO, %f9
fsubs %f5, %f9, %f0
fabss %f0, %f0
retl retl
fors %f0, SIGN_BIT, %f0 fors %f5, SIGN_BIT, %f0
END (__floorf_vis3) END (__floorf_vis3)

View File

@ -1,6 +1,6 @@
#include <sparc-ifunc.h> #include <sparc-ifunc.h>
SPARC_ASM_VIS3_IFUNC(floorf) SPARC_ASM_VIS3_VIS2_IFUNC(floorf)
weak_alias (__floorf, floorf) weak_alias (__floorf, floorf)