Optimized sparc floor{,f} routines.

* sysdeps/sparc/sparc32/sparcv9/fpu/s_floor.S: New file. * sysdeps/sparc/sparc32/sparcv9/fpu/s_floorf.S: New file. * sysdeps/sparc/sparc64/fpu/s_floor.S: New file. * sysdeps/sparc/sparc64/fpu/s_floorf.S: New file.
2025-01-09 10:50:08 +00:00 · 2012-02-27 20:36:30 -08:00 · 2012-02-27 20:36:30 -08:00 · 1aff59a3f7
commit 1aff59a3f7
parent d6b5aa5879
5 changed files with 339 additions and 0 deletions
--- a/5
+++ b/5
@ -1,5 +1,10 @@
 2012-02-27  David S. Miller  <davem@davemloft.net>
 	* sysdeps/sparc/sparc32/sparcv9/fpu/s_floor.S: New file.
 	* sysdeps/sparc/sparc32/sparcv9/fpu/s_floorf.S: New file.
 	* sysdeps/sparc/sparc64/fpu/s_floor.S: New file.
 	* sysdeps/sparc/sparc64/fpu/s_floorf.S: New file.
 	* sysdeps/sparc/sparc32/sparcv9/fpu/s_ceil.S: Fix accidental use of
 	frame pointer instead of stack pointer relative arg slot.
 	* sysdeps/sparc/sparc32/sparcv9/fpu/s_ceilf.S: Likewise.
--- a/sysdeps/sparc/sparc32/sparcv9/fpu/s_floor.S
+++ b/sysdeps/sparc/sparc32/sparcv9/fpu/s_floor.S
@ -0,0 +1,87 @@
 /* floor function, sparc32 v9 version.
   Copyright (C) 2012 Free Software Foundation, Inc.
   This file is part of the GNU C Library.
   Contributed by David S. Miller <davem@davemloft.net>, 2012.
   The GNU C Library is free software; you can redistribute it and/or
   modify it under the terms of the GNU Lesser General Public
   License as published by the Free Software Foundation; either
   version 2.1 of the License, or (at your option) any later version.
   The GNU C Library is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   Lesser General Public License for more details.
   You should have received a copy of the GNU Lesser General Public
   License along with the GNU C Library; if not, see
   <http://www.gnu.org/licenses/>.  */
 #include <sysdep.h>
 	/* Since changing the rounding mode is extremely expensive, we
 	   try to round up using a method that is rounding mode
 	   agnostic.
 	   We add then subtract (or subtract than add if the initial
 	   value was negative) 2**23 to the value, then subtract it
 	   back out.
 	   This will clear out the fractional portion of the value.
 	   One of two things will happen for non-whole initial values.
 	   Either the rounding mode will round it up, or it will be
 	   rounded down.  If the value started out whole, it will be
 	   equal after the addition and subtraction.  This means we
 	   can accurately detect with one test whether we need to add
 	   another 1.0 to round it up properly.
 	   We pop constants into the FPU registers using the incoming
 	   argument stack slots, since this avoid having to use any
 	   PIC references.  We also thus avoid having to allocate a
 	   register window.
 	   VIS instructions are used to facilitate the formation of
 	   easier constants, and the propagation of the sign bit.  */
 #define TWO_FIFTYTWO	0x43300000		/* 2**52 */
 #define ONE_DOT_ZERO	0x3ff00000		/* 1.0 */
 #define ZERO		%f10			/* 0.0 */
 #define SIGN_BIT	%f12			/* -0.0 */
 ENTRY (__floor)
 	sethi	%hi(TWO_FIFTYTWO), %o2
 	sllx	%o0, 32, %o0
 	sethi	%hi(ONE_DOT_ZERO), %o3
 	or	%o0, %o1, %o0
 	stx	%o0, [%sp + 72]
 	sllx	%o2, 32, %o2
 	fzero	ZERO
 	sllx	%o3, 32, %o3
 	ldd	[%sp + 72], %f0
 	fnegd	ZERO, SIGN_BIT
 	stx	%o2, [%sp + 72]
 	fabsd	%f0, %f14
 	ldd	[%sp + 72], %f16
 	fcmpd	%fcc3, %f14, %f16
 	fmovduge %fcc3, ZERO, %f16
 	fand	%f0, SIGN_BIT, SIGN_BIT
 	for	%f16, SIGN_BIT, %f16
 	faddd	%f0, %f16, %f18
 	fsubd	%f18, %f16, %f18
 	fcmpd	%fcc2, %f18, %f0
 	stx	%o3, [%sp + 72]
 	ldd	[%sp + 72], %f20
 	fmovdule %fcc2, ZERO, %f20
 	fsubd	%f18, %f20, %f0
 	fabsd	%f0, %f0
 	retl
 	 for	%f0, SIGN_BIT, %f0
 END (__floor)
 weak_alias (__floor, floor)
--- a/sysdeps/sparc/sparc32/sparcv9/fpu/s_floorf.S
+++ b/sysdeps/sparc/sparc32/sparcv9/fpu/s_floorf.S
@ -0,0 +1,83 @@
 /* Float floor function, sparc32 v9 version.
   Copyright (C) 2012 Free Software Foundation, Inc.
   This file is part of the GNU C Library.
   Contributed by David S. Miller <davem@davemloft.net>, 2012.
   The GNU C Library is free software; you can redistribute it and/or
   modify it under the terms of the GNU Lesser General Public
   License as published by the Free Software Foundation; either
   version 2.1 of the License, or (at your option) any later version.
   The GNU C Library is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   Lesser General Public License for more details.
   You should have received a copy of the GNU Lesser General Public
   License along with the GNU C Library; if not, see
   <http://www.gnu.org/licenses/>.  */
 #include <sysdep.h>
 	/* Since changing the rounding mode is extremely expensive, we
 	   try to round up using a method that is rounding mode
 	   agnostic.
 	   We add then subtract (or subtract than add if the initial
 	   value was negative) 2**23 to the value, then subtract it
 	   back out.
 	   This will clear out the fractional portion of the value.
 	   One of two things will happen for non-whole initial values.
 	   Either the rounding mode will round it up, or it will be
 	   rounded down.  If the value started out whole, it will be
 	   equal after the addition and subtraction.  This means we
 	   can accurately detect with one test whether we need to add
 	   another 1.0 to round it up properly.
 	   We pop constants into the FPU registers using the incoming
 	   argument stack slots, since this avoid having to use any
 	   PIC references.  We also thus avoid having to allocate a
 	   register window.
 	   VIS instructions are used to facilitate the formation of
 	   easier constants, and the propagation of the sign bit.  */
 #define TWO_TWENTYTHREE	0x4b000000		/* 2**23 */
 #define ONE_DOT_ZERO	0x3f800000		/* 1.0 */
 #define ZERO		%f10			/* 0.0 */
 #define SIGN_BIT	%f12			/* -0.0 */
 ENTRY (__floorf)
 	st	%o0, [%sp + 68]
 	sethi	%hi(TWO_TWENTYTHREE), %o2
 	sethi	%hi(ONE_DOT_ZERO), %o3
 	fzeros	ZERO
 	ld	[%sp + 68], %f0
 	fnegs	ZERO, SIGN_BIT
 	st	%o2, [%sp + 68]
 	fabss	%f0, %f14
 	ld	[%sp + 68], %f16
 	fcmps	%fcc3, %f14, %f16
 	fmovsuge %fcc3, ZERO, %f16
 	fands	%f0, SIGN_BIT, SIGN_BIT
 	fors	%f16, SIGN_BIT, %f16
 	fadds	%f0, %f16, %f1
 	fsubs	%f1, %f16, %f1
 	fcmps	%fcc2, %f1, %f0
 	st	%o3, [%sp + 68]
 	ld	[%sp + 68], %f9
 	fmovsule %fcc2, ZERO, %f9
 	fsubs	%f1, %f9, %f0
 	fabss	%f0, %f0
 	retl
 	 fors	%f0, SIGN_BIT, %f0
 END (__floorf)
 weak_alias (__floorf, floorf)
--- a/sysdeps/sparc/sparc64/fpu/s_floor.S
+++ b/sysdeps/sparc/sparc64/fpu/s_floor.S
@ -0,0 +1,83 @@
 /* floor function, sparc64 version.
   Copyright (C) 2012 Free Software Foundation, Inc.
   This file is part of the GNU C Library.
   Contributed by David S. Miller <davem@davemloft.net>, 2012.
   The GNU C Library is free software; you can redistribute it and/or
   modify it under the terms of the GNU Lesser General Public
   License as published by the Free Software Foundation; either
   version 2.1 of the License, or (at your option) any later version.
   The GNU C Library is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   Lesser General Public License for more details.
   You should have received a copy of the GNU Lesser General Public
   License along with the GNU C Library; if not, see
   <http://www.gnu.org/licenses/>.  */
 #include <sysdep.h>
 	/* Since changing the rounding mode is extremely expensive, we
 	   try to round up using a method that is rounding mode
 	   agnostic.
 	   We add then subtract (or subtract than add if the initial
 	   value was negative) 2**23 to the value, then subtract it
 	   back out.
 	   This will clear out the fractional portion of the value.
 	   One of two things will happen for non-whole initial values.
 	   Either the rounding mode will round it up, or it will be
 	   rounded down.  If the value started out whole, it will be
 	   equal after the addition and subtraction.  This means we
 	   can accurately detect with one test whether we need to add
 	   another 1.0 to round it up properly.
 	   We pop constants into the FPU registers using the incoming
 	   argument stack slots, since this avoid having to use any
 	   PIC references.  We also thus avoid having to allocate a
 	   register window.
 	   VIS instructions are used to facilitate the formation of
 	   easier constants, and the propagation of the sign bit.  */
 #define TWO_FIFTYTWO	0x43300000		/* 2**52 */
 #define ONE_DOT_ZERO	0x3ff00000		/* 1.0 */
 #define ZERO		%f10			/* 0.0 */
 #define SIGN_BIT	%f12			/* -0.0 */
 ENTRY (__floor)
 	sethi	%hi(TWO_FIFTYTWO), %o2
 	sethi	%hi(ONE_DOT_ZERO), %o3
 	fzero	ZERO
 	sllx	%o2, 32, %o2
 	fnegd	ZERO, SIGN_BIT
 	sllx	%o3, 32, %o3
 	stx	%o2, [%sp + STACK_BIAS + 128]
 	fabsd	%f0, %f14
 	ldd	[%sp + STACK_BIAS + 128], %f16
 	fcmpd	%fcc3, %f14, %f16
 	fmovduge %fcc3, ZERO, %f16
 	fand	%f0, SIGN_BIT, SIGN_BIT
 	for	%f16, SIGN_BIT, %f16
 	faddd	%f0, %f16, %f18
 	fsubd	%f18, %f16, %f18
 	fcmpd	%fcc2, %f18, %f0
 	stx	%o3, [%sp + STACK_BIAS + 128]
 	ldd	[%sp + STACK_BIAS + 128], %f20
 	fmovdule %fcc2, ZERO, %f20
 	fsubd	%f18, %f20, %f0
 	fabsd	%f0, %f0
 	retl
 	 for	%f0, SIGN_BIT, %f0
 END (__floor)
 weak_alias (__floor, floor)
--- a/sysdeps/sparc/sparc64/fpu/s_floorf.S
+++ b/sysdeps/sparc/sparc64/fpu/s_floorf.S
@ -0,0 +1,81 @@
 /* Float floor function, sparc64 version.
   Copyright (C) 2012 Free Software Foundation, Inc.
   This file is part of the GNU C Library.
   Contributed by David S. Miller <davem@davemloft.net>, 2012.
   The GNU C Library is free software; you can redistribute it and/or
   modify it under the terms of the GNU Lesser General Public
   License as published by the Free Software Foundation; either
   version 2.1 of the License, or (at your option) any later version.
   The GNU C Library is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   Lesser General Public License for more details.
   You should have received a copy of the GNU Lesser General Public
   License along with the GNU C Library; if not, see
   <http://www.gnu.org/licenses/>.  */
 #include <sysdep.h>
 	/* Since changing the rounding mode is extremely expensive, we
 	   try to round up using a method that is rounding mode
 	   agnostic.
 	   We add then subtract (or subtract than add if the initial
 	   value was negative) 2**23 to the value, then subtract it
 	   back out.
 	   This will clear out the fractional portion of the value.
 	   One of two things will happen for non-whole initial values.
 	   Either the rounding mode will round it up, or it will be
 	   rounded down.  If the value started out whole, it will be
 	   equal after the addition and subtraction.  This means we
 	   can accurately detect with one test whether we need to add
 	   another 1.0 to round it up properly.
 	   We pop constants into the FPU registers using the incoming
 	   argument stack slots, since this avoid having to use any
 	   PIC references.  We also thus avoid having to allocate a
 	   register window.
 	   VIS instructions are used to facilitate the formation of
 	   easier constants, and the propagation of the sign bit.  */
 #define TWO_TWENTYTHREE	0x4b000000		/* 2**23 */
 #define ONE_DOT_ZERO	0x3f800000		/* 1.0 */
 #define ZERO		%f10			/* 0.0 */
 #define SIGN_BIT	%f12			/* -0.0 */
 ENTRY (__floorf)
 	sethi	%hi(TWO_TWENTYTHREE), %o2
 	sethi	%hi(ONE_DOT_ZERO), %o3
 	fzeros	ZERO
 	fnegs	ZERO, SIGN_BIT
 	st	%o2, [%sp + STACK_BIAS + 128]
 	fabss	%f1, %f14
 	ld	[%sp + STACK_BIAS + 128], %f16
 	fcmps	%fcc3, %f14, %f16
 	fmovsuge %fcc3, ZERO, %f16
 	fands	%f1, SIGN_BIT, SIGN_BIT
 	fors	%f16, SIGN_BIT, %f16
 	fadds	%f1, %f16, %f5
 	fsubs	%f5, %f16, %f5
 	fcmps	%fcc2, %f5, %f1
 	st	%o3, [%sp + STACK_BIAS + 128]
 	ld	[%sp + STACK_BIAS + 128], %f9
 	fmovsule %fcc2, ZERO, %f9
 	fsubs	%f5, %f9, %f0
 	fabss	%f0, %f0
 	retl
 	 fors	%f0, SIGN_BIT, %f0
 END (__floorf)
 weak_alias (__floorf, floorf)