Optimize sparc {ceil,floor}{,f} using vis2 'siam' instruction.

* sysdeps/sparc/sparc-ifunc.h (SPARC_ASM_IFUNC2): New macro. (SPARC_ASM_VIS2_IFUNC): Likewise. (SPARC_ASM_VIS3_VIS2_IFUNC): Likewise. * sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_ceil-vis3.S: Make use of 'siam' instruction. * sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_ceilf-vis3.S: Likewise. * sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_floor-vis3.S: Likewise. * sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_floorf-vis3.S: Likewise. * sysdeps/sparc/sparc64/fpu/multiarch/s_ceil-vis3.S: Likewise. * sysdeps/sparc/sparc64/fpu/multiarch/s_ceilf-vis3.S: Likewise. * sysdeps/sparc/sparc64/fpu/multiarch/s_floor-vis3.S: Likewise. * sysdeps/sparc/sparc64/fpu/multiarch/s_floorf-vis3.S: Likewise. * sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_ceil-vis2.S: New file. * sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_ceilf-vis2.S: New file. * sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_floor-vis2.S: New file. * sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_floorf-vis2.S: New file. * sysdeps/sparc/sparc64/fpu/multiarch/s_ceil-vis2.S: New file. * sysdeps/sparc/sparc64/fpu/multiarch/s_ceilf-vis2.S: New file. * sysdeps/sparc/sparc64/fpu/multiarch/s_floor-vis2.S: New file. * sysdeps/sparc/sparc64/fpu/multiarch/s_floorf-vis2.S: New file. * sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_ceil.S: Hook in new VIS2 routines. * sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_ceilf.S: Likewise. * sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_floor.S: Likewise. * sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_floorf.S: Likewise. * sysdeps/sparc/sparc64/fpu/multiarch/s_ceil.S: Likewise. * sysdeps/sparc/sparc64/fpu/multiarch/s_ceilf.S: Likewise. * sysdeps/sparc/sparc64/fpu/multiarch/s_floor.S: Likewise. * sysdeps/sparc/sparc64/fpu/multiarch/s_floorf.S: Likewise. * sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/Makefile: Add new VIS2 routines to libm-sysdep_routines. * sysdeps/sparc/sparc64/fpu/multiarch/Makefile: Likewise.
2024-11-22 21:10:07 +00:00 · 2013-01-14 21:47:29 -08:00 · 2013-01-14 21:47:29 -08:00 · 8b954ab9b8
commit 8b954ab9b8
parent 65a82e3dd5
28 changed files with 670 additions and 223 deletions
--- a/41
+++ b/41
@ -1,5 +1,46 @@
 2013-01-14  David S. Miller  <davem@davemloft.net>
 	* sysdeps/sparc/sparc-ifunc.h (SPARC_ASM_IFUNC2): New macro.
 	(SPARC_ASM_VIS2_IFUNC): Likewise.
 	(SPARC_ASM_VIS3_VIS2_IFUNC): Likewise.
 	* sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_ceil-vis3.S: Make
 	use of 'siam' instruction.
 	* sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_ceilf-vis3.S:
 	Likewise.
 	* sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_floor-vis3.S:
 	Likewise.
 	* sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_floorf-vis3.S:
 	Likewise.
 	* sysdeps/sparc/sparc64/fpu/multiarch/s_ceil-vis3.S: Likewise.
 	* sysdeps/sparc/sparc64/fpu/multiarch/s_ceilf-vis3.S: Likewise.
 	* sysdeps/sparc/sparc64/fpu/multiarch/s_floor-vis3.S: Likewise.
 	* sysdeps/sparc/sparc64/fpu/multiarch/s_floorf-vis3.S: Likewise.
 	* sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_ceil-vis2.S: New
 	file.
 	* sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_ceilf-vis2.S: New
 	file.
 	* sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_floor-vis2.S: New
 	file.
 	* sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_floorf-vis2.S: New
 	file.
 	* sysdeps/sparc/sparc64/fpu/multiarch/s_ceil-vis2.S: New file.
 	* sysdeps/sparc/sparc64/fpu/multiarch/s_ceilf-vis2.S: New file.
 	* sysdeps/sparc/sparc64/fpu/multiarch/s_floor-vis2.S: New file.
 	* sysdeps/sparc/sparc64/fpu/multiarch/s_floorf-vis2.S: New file.
 	* sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_ceil.S: Hook in
 	new VIS2 routines.
 	* sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_ceilf.S: Likewise.
 	* sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_floor.S: Likewise.
 	* sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_floorf.S:
 	Likewise.
 	* sysdeps/sparc/sparc64/fpu/multiarch/s_ceil.S: Likewise.
 	* sysdeps/sparc/sparc64/fpu/multiarch/s_ceilf.S: Likewise.
 	* sysdeps/sparc/sparc64/fpu/multiarch/s_floor.S: Likewise.
 	* sysdeps/sparc/sparc64/fpu/multiarch/s_floorf.S: Likewise.
 	* sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/Makefile: Add new VIS2
 	routines to libm-sysdep_routines.
 	* sysdeps/sparc/sparc64/fpu/multiarch/Makefile: Likewise.
 	* sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/Makefile: Add vis3
 	fdim/fdimf to libm-sysdep_routines.
 	* sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_fdim-vis3.S: New
--- a/sysdeps/sparc/sparc-ifunc.h
+++ b/sysdeps/sparc/sparc-ifunc.h
@ -51,6 +51,33 @@ ENTRY (__##name)					\
 	 mov	%o1, %o0;				\
 END (__##name)
 #  define SPARC_ASM_IFUNC2(name, m1, f1, m2, f2, dflt)	\
 ENTRY (__##name)					\
 	.type	__##name, @gnu_indirect_function;	\
 	SETUP_PIC_REG_LEAF(o3, o5);			\
 	set	m1, %o1;				\
 	andcc	%o0, %o1, %g0;				\
 	be	8f;					\
 	 nop;						\
 	sethi	%gdop_hix22(f1), %o1;			\
 	xor	%o1, %gdop_lox10(f1), %o1;		\
 	ba	10f;					\
 	 nop;						\
 8:	set	m2, %o1;				\
 	andcc	%o0, %o1, %g0;				\
 	be	9f;					\
 	 nop;						\
 	sethi	%gdop_hix22(f2), %o1;			\
 	xor	%o1, %gdop_lox10(f2), %o1;		\
 	ba	10f;					\
 	 nop;						\
 9:	sethi	%gdop_hix22(dflt), %o1;			\
 	xor	%o1, %gdop_lox10(dflt), %o1;		\
 10:	add	%o3, %o1, %o1;				\
 	retl;						\
 	 mov	%o1, %o0;				\
 END (__##name)
 # else /* SHARED */
 # ifdef __arch64__
@ -82,19 +109,54 @@ ENTRY (__##name)					\
 	 mov	%o1, %o0;				\
 END (__##name)
 #  define SPARC_ASM_IFUNC2(name, m1, f1, m2, f2, dflt)	\
 ENTRY (__##name)					\
 	.type	__##name, @gnu_indirect_function;	\
 	set	m1, %o1;				\
 	andcc	%o0, %o1, %g0;				\
 	be	8f;					\
 	 nop;						\
 	SET(f1, %g1, %o1);				\
 	ba	10f;					\
 	 nop;						\
 8:	set	m2, %o1;				\
 	andcc	%o0, %o1, %g0;				\
 	be	9f;					\
 	 nop;						\
 	SET(f2, %g1, %o1);				\
 	ba	10f;					\
 	 nop;						\
 9:	SET(dflt, %g1, %o1);				\
 10:	retl;						\
 	 mov	%o1, %o0;				\
 END (__##name)
 # endif /* SHARED */
 #define SPARC_ASM_VIS2_IFUNC(name)			\
 	SPARC_ASM_IFUNC1(name, HWCAP_SPARC_VIS2,	\
 			 __##name##_vis2, __##name##_generic)
 # ifdef HAVE_AS_VIS3_SUPPORT
 #define SPARC_ASM_VIS3_IFUNC(name)			\
 	SPARC_ASM_IFUNC1(name, HWCAP_SPARC_VIS3,	\
 			 __##name##_vis3, __##name##_generic)
 #define SPARC_ASM_VIS3_VIS2_IFUNC(name)			\
 	SPARC_ASM_IFUNC2(name, HWCAP_SPARC_VIS3,	\
 			 __##name##_vis3,		\
 			 HWCAP_SPARC_VIS2,		\
 			 __##name##_vis2, __##name##_generic)
 # else /* HAVE_AS_VIS3_SUPPORT */
 #define SPARC_ASM_VIS3_IFUNC(name)			\
 	SPARC_ASM_IFUNC_DFLT(name, __##name##_generic)
 #define SPARC_ASM_VIS3_VIS2_IFUNC(name)			\
 	SPARC_ASM_VIS2_IFUNC(name)
 # endif /* HAVE_AS_VIS3_SUPPORT */
--- a/sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/Makefile
+++ b/sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/Makefile
@ -1,4 +1,6 @@
 ifeq ($(subdir),math)
 libm-sysdep_routines += s_ceil-vis2 s_ceilf-vis2 \
 			s_floor-vis2 s_floorf-vis2
 ifeq ($(have-as-vis3),yes)
 libm-sysdep_routines += m_copysignf-vis3 m_copysign-vis3 s_ceilf-vis3 \
 			s_ceil-vis3 s_fabs-vis3 s_fabsf-vis3 s_floor-vis3 \
--- a/sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_ceil-vis2.S
+++ b/sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_ceil-vis2.S
@ -0,0 +1,61 @@
 /* ceil function, sparc32 v9 vis2 version.
   Copyright (C) 2013 Free Software Foundation, Inc.
   This file is part of the GNU C Library.
   Contributed by David S. Miller <davem@davemloft.net>, 2013.
   The GNU C Library is free software; you can redistribute it and/or
   modify it under the terms of the GNU Lesser General Public
   License as published by the Free Software Foundation; either
   version 2.1 of the License, or (at your option) any later version.
   The GNU C Library is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   Lesser General Public License for more details.
   You should have received a copy of the GNU Lesser General Public
   License along with the GNU C Library; if not, see
   <http://www.gnu.org/licenses/>.  */
 #include <sysdep.h>
 	/* 'siam' (Set Interval Arithmetic Mode) is used to quickly override
 	   the rounding mode during this routine.
 	   We add then subtract (or subtract than add if the initial
 	   value was negative) 2**23 to the value, then subtract it
 	   back out.
 	   This will clear out the fractional portion of the value and,
 	   with suitable 'siam' initiated rouding mode settings, round
 	   the final result in the proper direction.  */
 #define TWO_FIFTYTWO	0x43300000		/* 2**52 */
 #define ZERO		%f10			/* 0.0 */
 #define SIGN_BIT	%f12			/* -0.0 */
 ENTRY (__ceil_vis2)
 	sethi	%hi(TWO_FIFTYTWO), %o2
 	sllx	%o0, 32, %o0
 	or	%o0, %o1, %o0
 	stx	%o0, [%sp + 72]
 	sllx	%o2, 32, %o2
 	fzero	ZERO
 	ldd	[%sp + 72], %f0
 	fnegd	ZERO, SIGN_BIT
 	stx	%o2, [%sp + 72]
 	fabsd	%f0, %f14
 	ldd	[%sp + 72], %f16
 	fcmpd	%fcc3, %f14, %f16
 	fmovduge %fcc3, ZERO, %f16
 	fand	%f0, SIGN_BIT, SIGN_BIT
 	for	%f16, SIGN_BIT, %f16
 	siam	(1 << 2) | 2
 	faddd	%f0, %f16, %f18
 	siam	(1 << 2) | 0
 	fsubd	%f18, %f16, %f18
 	siam	(0 << 2)
 	retl
 	 for	%f18, SIGN_BIT, %f0
 END (__ceil_vis2)
--- a/sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_ceil-vis3.S
+++ b/sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_ceil-vis3.S
@ -19,27 +19,21 @@
 #include <sysdep.h>
-	/* Since changing the rounding mode is extremely expensive, we
+	/* 'siam' (Set Interval Arithmetic Mode) is used to quickly override
-	   try to round up using a method that is rounding mode
+	   the rounding mode during this routine.
 	   agnostic.
 	   We add then subtract (or subtract than add if the initial
 	   value was negative) 2**23 to the value, then subtract it
 	   back out.
-	   This will clear out the fractional portion of the value.
+	   This will clear out the fractional portion of the value and,
-	   One of two things will happen for non-whole initial values.
+	   with suitable 'siam' initiated rouding mode settings, round
-	   Either the rounding mode will round it up, or it will be
+	   the final result in the proper direction.
 	   rounded down.  If the value started out whole, it will be
 	   equal after the addition and subtraction.  This means we
 	   can accurately detect with one test whether we need to add
 	   another 1.0 to round it up properly.
-	   VIS instructions are used to facilitate the formation of
+	   We also use VIS3 moves to avoid using the stack to transfer
-	   easier constants, and the propagation of the sign bit.  */
+	   values between float and integer registers.  */
 #define TWO_FIFTYTWO	0x43300000		/* 2**52 */
 #define ONE_DOT_ZERO	0x3ff00000		/* 1.0 */
 #define ZERO		%f10			/* 0.0 */
 #define SIGN_BIT	%f12			/* -0.0 */
@ -47,32 +41,22 @@
 ENTRY (__ceil_vis3)
 	sethi	%hi(TWO_FIFTYTWO), %o2
 	sllx	%o0, 32, %o0
-	sethi	%hi(ONE_DOT_ZERO), %o3
+	sllx	%o2, 32, %o2
 	or	%o0, %o1, %o0
 	movxtod	%o0, %f0
 	sllx	%o2, 32, %o2
 	fzero	ZERO
 	sllx	%o3, 32, %o3
 	fnegd	ZERO, SIGN_BIT
 	movxtod	%o2, %f16
 	fabsd	%f0, %f14
 	fcmpd	%fcc3, %f14, %f16
 	fmovduge %fcc3, ZERO, %f16
 	fand	%f0, SIGN_BIT, SIGN_BIT
 	for	%f16, SIGN_BIT, %f16
 	siam	(1 << 2) | 2
 	faddd	%f0, %f16, %f18
 	siam	(1 << 2) | 0
 	fsubd	%f18, %f16, %f18
-	fcmpd	%fcc2, %f18, %f0
+	siam	(0 << 2)
 	movxtod	%o3, %f20
 	fmovduge %fcc2, ZERO, %f20
 	faddd	%f18, %f20, %f0
 	fabsd	%f0, %f0
 	retl
-	 for	%f0, SIGN_BIT, %f0
+	 for	%f18, SIGN_BIT, %f0
 END (__ceil_vis3)
--- a/sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_ceil.S
+++ b/sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_ceil.S
@ -1,7 +1,7 @@
 #include <sparc-ifunc.h>
 #include <math_ldbl_opt.h>
-SPARC_ASM_VIS3_IFUNC(ceil)
+SPARC_ASM_VIS3_VIS2_IFUNC(ceil)
 weak_alias (__ceil, ceil)
--- a/sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_ceilf-vis2.S
+++ b/sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_ceilf-vis2.S
@ -0,0 +1,58 @@
 /* Float ceil function, sparc32 v9 vis2 version.
   Copyright (C) 2013 Free Software Foundation, Inc.
   This file is part of the GNU C Library.
   Contributed by David S. Miller <davem@davemloft.net>, 2013.
   The GNU C Library is free software; you can redistribute it and/or
   modify it under the terms of the GNU Lesser General Public
   License as published by the Free Software Foundation; either
   version 2.1 of the License, or (at your option) any later version.
   The GNU C Library is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   Lesser General Public License for more details.
   You should have received a copy of the GNU Lesser General Public
   License along with the GNU C Library; if not, see
   <http://www.gnu.org/licenses/>.  */
 #include <sysdep.h>
 	/* 'siam' (Set Interval Arithmetic Mode) is used to quickly override
 	   the rounding mode during this routine.
 	   We add then subtract (or subtract than add if the initial
 	   value was negative) 2**23 to the value, then subtract it
 	   back out.
 	   This will clear out the fractional portion of the value and,
 	   with suitable 'siam' initiated rouding mode settings, round
 	   the final result in the proper direction.  */
 #define TWO_TWENTYTHREE	0x4b000000		/* 2**23 */
 #define ZERO		%f10			/* 0.0 */
 #define SIGN_BIT	%f12			/* -0.0 */
 ENTRY (__ceilf_vis2)
 	st	%o0, [%sp + 68]
 	sethi	%hi(TWO_TWENTYTHREE), %o2
 	fzeros	ZERO
 	ld	[%sp + 68], %f0
 	fnegs	ZERO, SIGN_BIT
 	st	%o2, [%sp + 68]
 	fabss	%f0, %f14
 	ld	[%sp + 68], %f16
 	fcmps	%fcc3, %f14, %f16
 	fmovsuge %fcc3, ZERO, %f16
 	fands	%f0, SIGN_BIT, SIGN_BIT
 	fors	%f16, SIGN_BIT, %f16
 	siam	(1 << 2) | 2
 	fadds	%f0, %f16, %f1
 	siam	(1 << 2) | 0
 	fsubs	%f1, %f16, %f1
 	siam	(0 << 2)
 	retl
 	 fors	%f1, SIGN_BIT, %f0
 END (__ceilf_vis2)
--- a/sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_ceilf-vis3.S
+++ b/sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_ceilf-vis3.S
@ -19,27 +19,21 @@
 #include <sysdep.h>
-	/* Since changing the rounding mode is extremely expensive, we
+	/* 'siam' (Set Interval Arithmetic Mode) is used to quickly override
-	   try to round up using a method that is rounding mode
+	   the rounding mode during this routine.
 	   agnostic.
 	   We add then subtract (or subtract than add if the initial
 	   value was negative) 2**23 to the value, then subtract it
 	   back out.
-	   This will clear out the fractional portion of the value.
+	   This will clear out the fractional portion of the value and,
-	   One of two things will happen for non-whole initial values.
+	   with suitable 'siam' initiated rouding mode settings, round
-	   Either the rounding mode will round it up, or it will be
+	   the final result in the proper direction.
 	   rounded down.  If the value started out whole, it will be
 	   equal after the addition and subtraction.  This means we
 	   can accurately detect with one test whether we need to add
 	   another 1.0 to round it up properly.
-	   VIS instructions are used to facilitate the formation of
+	   We also use VIS3 moves to avoid using the stack to transfer
-	   easier constants, and the propagation of the sign bit.  */
+	   values between float and integer registers.  */
 #define TWO_TWENTYTHREE	0x4b000000		/* 2**23 */
 #define ONE_DOT_ZERO	0x3f800000		/* 1.0 */
 #define ZERO		%f10			/* 0.0 */
 #define SIGN_BIT	%f12			/* -0.0 */
@ -47,28 +41,19 @@
 ENTRY (__ceilf_vis3)
 	movwtos	%o0, %f0
 	sethi	%hi(TWO_TWENTYTHREE), %o2
 	sethi	%hi(ONE_DOT_ZERO), %o3
 	fzeros	ZERO
 	fnegs	ZERO, SIGN_BIT
 	movwtos	%o2, %f16
 	fabss	%f0, %f14
 	fcmps	%fcc3, %f14, %f16
 	fmovsuge %fcc3, ZERO, %f16
 	fands	%f0, SIGN_BIT, SIGN_BIT
 	fors	%f16, SIGN_BIT, %f16
 	siam	(1 << 2) | 2
 	fadds	%f0, %f16, %f1
 	siam	(1 << 2) | 0
 	fsubs	%f1, %f16, %f1
-	fcmps	%fcc2, %f1, %f0
+	siam	(0 << 2)
 	movwtos	%o3, %f9
 	fmovsuge %fcc2, ZERO, %f9
 	fadds	%f1, %f9, %f0
 	fabss	%f0, %f0
 	retl
-	 fors	%f0, SIGN_BIT, %f0
+	 fors	%f1, SIGN_BIT, %f0
 END (__ceilf_vis3)
--- a/sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_ceilf.S
+++ b/sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_ceilf.S
@ -1,6 +1,6 @@
 #include <sparc-ifunc.h>
-SPARC_ASM_VIS3_IFUNC(ceilf)
+SPARC_ASM_VIS3_VIS2_IFUNC(ceilf)
 weak_alias (__ceilf, ceilf)
--- a/sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_floor-vis2.S
+++ b/sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_floor-vis2.S
@ -0,0 +1,61 @@
 /* floor function, sparc32 v9 vis2 version.
   Copyright (C) 2013 Free Software Foundation, Inc.
   This file is part of the GNU C Library.
   Contributed by David S. Miller <davem@davemloft.net>, 2013.
   The GNU C Library is free software; you can redistribute it and/or
   modify it under the terms of the GNU Lesser General Public
   License as published by the Free Software Foundation; either
   version 2.1 of the License, or (at your option) any later version.
   The GNU C Library is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   Lesser General Public License for more details.
   You should have received a copy of the GNU Lesser General Public
   License along with the GNU C Library; if not, see
   <http://www.gnu.org/licenses/>.  */
 #include <sysdep.h>
 	/* 'siam' (Set Interval Arithmetic Mode) is used to quickly override
 	   the rounding mode during this routine.
 	   We add then subtract (or subtract than add if the initial
 	   value was negative) 2**23 to the value, then subtract it
 	   back out.
 	   This will clear out the fractional portion of the value and,
 	   with suitable 'siam' initiated rouding mode settings, round
 	   the final result in the proper direction.  */
 #define TWO_FIFTYTWO	0x43300000		/* 2**52 */
 #define ZERO		%f10			/* 0.0 */
 #define SIGN_BIT	%f12			/* -0.0 */
 ENTRY (__floor_vis2)
 	sethi	%hi(TWO_FIFTYTWO), %o2
 	sllx	%o0, 32, %o0
 	or	%o0, %o1, %o0
 	stx	%o0, [%sp + 72]
 	sllx	%o2, 32, %o2
 	fzero	ZERO
 	ldd	[%sp + 72], %f0
 	fnegd	ZERO, SIGN_BIT
 	stx	%o2, [%sp + 72]
 	fabsd	%f0, %f14
 	ldd	[%sp + 72], %f16
 	fcmpd	%fcc3, %f14, %f16
 	fmovduge %fcc3, ZERO, %f16
 	fand	%f0, SIGN_BIT, SIGN_BIT
 	for	%f16, SIGN_BIT, %f16
 	siam	(1 << 2) | 3
 	faddd	%f0, %f16, %f18
 	siam	(1 << 2) | 0
 	fsubd	%f18, %f16, %f18
 	siam	(0 << 2)
 	retl
 	 for	%f18, SIGN_BIT, %f0
 END (__floor_vis2)
--- a/sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_floor-vis3.S
+++ b/sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_floor-vis3.S
@ -19,27 +19,21 @@
 #include <sysdep.h>
-	/* Since changing the rounding mode is extremely expensive, we
+	/* 'siam' (Set Interval Arithmetic Mode) is used to quickly override
-	   try to round up using a method that is rounding mode
+	   the rounding mode during this routine.
 	   agnostic.
 	   We add then subtract (or subtract than add if the initial
 	   value was negative) 2**23 to the value, then subtract it
 	   back out.
-	   This will clear out the fractional portion of the value.
+	   This will clear out the fractional portion of the value and,
-	   One of two things will happen for non-whole initial values.
+	   with suitable 'siam' initiated rouding mode settings, round
-	   Either the rounding mode will round it up, or it will be
+	   the final result in the proper direction.
 	   rounded down.  If the value started out whole, it will be
 	   equal after the addition and subtraction.  This means we
 	   can accurately detect with one test whether we need to add
 	   another 1.0 to round it up properly.
-	   VIS instructions are used to facilitate the formation of
+	   We also use VIS3 moves to avoid using the stack to transfer
-	   easier constants, and the propagation of the sign bit.  */
+	   values between float and integer registers.  */
 #define TWO_FIFTYTWO	0x43300000		/* 2**52 */
 #define ONE_DOT_ZERO	0x3ff00000		/* 1.0 */
 #define ZERO		%f10			/* 0.0 */
 #define SIGN_BIT	%f12			/* -0.0 */
@ -47,32 +41,22 @@
 ENTRY (__floor_vis3)
 	sethi	%hi(TWO_FIFTYTWO), %o2
 	sllx	%o0, 32, %o0
-	sethi	%hi(ONE_DOT_ZERO), %o3
+	sllx	%o2, 32, %o2
 	or	%o0, %o1, %o0
 	movxtod	%o0, %f0
 	sllx	%o2, 32, %o2
 	fzero	ZERO
 	sllx	%o3, 32, %o3
 	fnegd	ZERO, SIGN_BIT
 	movxtod	%o2, %f16
 	fabsd	%f0, %f14
 	fcmpd	%fcc3, %f14, %f16
 	fmovduge %fcc3, ZERO, %f16
 	fand	%f0, SIGN_BIT, SIGN_BIT
 	for	%f16, SIGN_BIT, %f16
 	siam	(1 << 2) | 3
 	faddd	%f0, %f16, %f18
 	siam	(1 << 2) | 0
 	fsubd	%f18, %f16, %f18
-	fcmpd	%fcc2, %f18, %f0
+	siam	(0 << 2)
 	movxtod	%o3, %f20
 	fmovdule %fcc2, ZERO, %f20
 	fsubd	%f18, %f20, %f0
 	fabsd	%f0, %f0
 	retl
-	 for	%f0, SIGN_BIT, %f0
+	 for	%f18, SIGN_BIT, %f0
 END (__floor_vis3)
--- a/sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_floor.S
+++ b/sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_floor.S
@ -1,7 +1,7 @@
 #include <sparc-ifunc.h>
 #include <math_ldbl_opt.h>
-SPARC_ASM_VIS3_IFUNC(floor)
+SPARC_ASM_VIS3_VIS2_IFUNC(floor)
 weak_alias (__floor, floor)
--- a/sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_floorf-vis2.S
+++ b/sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_floorf-vis2.S
@ -0,0 +1,58 @@
 /* Float floor function, sparc32 v9 vis2 version.
   Copyright (C) 2013 Free Software Foundation, Inc.
   This file is part of the GNU C Library.
   Contributed by David S. Miller <davem@davemloft.net>, 2013.
   The GNU C Library is free software; you can redistribute it and/or
   modify it under the terms of the GNU Lesser General Public
   License as published by the Free Software Foundation; either
   version 2.1 of the License, or (at your option) any later version.
   The GNU C Library is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   Lesser General Public License for more details.
   You should have received a copy of the GNU Lesser General Public
   License along with the GNU C Library; if not, see
   <http://www.gnu.org/licenses/>.  */
 #include <sysdep.h>
 	/* 'siam' (Set Interval Arithmetic Mode) is used to quickly override
 	   the rounding mode during this routine.
 	   We add then subtract (or subtract than add if the initial
 	   value was negative) 2**23 to the value, then subtract it
 	   back out.
 	   This will clear out the fractional portion of the value and,
 	   with suitable 'siam' initiated rouding mode settings, round
 	   the final result in the proper direction.  */
 #define TWO_TWENTYTHREE	0x4b000000		/* 2**23 */
 #define ZERO		%f10			/* 0.0 */
 #define SIGN_BIT	%f12			/* -0.0 */
 ENTRY (__floorf_vis2)
 	st	%o0, [%sp + 68]
 	sethi	%hi(TWO_TWENTYTHREE), %o2
 	fzeros	ZERO
 	ld	[%sp + 68], %f0
 	fnegs	ZERO, SIGN_BIT
 	st	%o2, [%sp + 68]
 	fabss	%f0, %f14
 	ld	[%sp + 68], %f16
 	fcmps	%fcc3, %f14, %f16
 	fmovsuge %fcc3, ZERO, %f16
 	fands	%f0, SIGN_BIT, SIGN_BIT
 	fors	%f16, SIGN_BIT, %f16
 	siam	(1 << 2) | 3
 	fadds	%f0, %f16, %f1
 	siam	(1 << 2) | 0
 	fsubs	%f1, %f16, %f1
 	siam	(0 << 2)
 	retl
 	 fors	%f1, SIGN_BIT, %f0
 END (__floorf_vis2)
--- a/sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_floorf-vis3.S
+++ b/sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_floorf-vis3.S
@ -19,27 +19,21 @@
 #include <sysdep.h>
-	/* Since changing the rounding mode is extremely expensive, we
+	/* 'siam' (Set Interval Arithmetic Mode) is used to quickly override
-	   try to round up using a method that is rounding mode
+	   the rounding mode during this routine.
 	   agnostic.
 	   We add then subtract (or subtract than add if the initial
 	   value was negative) 2**23 to the value, then subtract it
 	   back out.
-	   This will clear out the fractional portion of the value.
+	   This will clear out the fractional portion of the value and,
-	   One of two things will happen for non-whole initial values.
+	   with suitable 'siam' initiated rouding mode settings, round
-	   Either the rounding mode will round it up, or it will be
+	   the final result in the proper direction.
 	   rounded down.  If the value started out whole, it will be
 	   equal after the addition and subtraction.  This means we
 	   can accurately detect with one test whether we need to add
 	   another 1.0 to round it up properly.
-	   VIS instructions are used to facilitate the formation of
+	   We also use VIS3 moves to avoid using the stack to transfer
-	   easier constants, and the propagation of the sign bit.  */
+	   values between float and integer registers.  */
 #define TWO_TWENTYTHREE	0x4b000000		/* 2**23 */
 #define ONE_DOT_ZERO	0x3f800000		/* 1.0 */
 #define ZERO		%f10			/* 0.0 */
 #define SIGN_BIT	%f12			/* -0.0 */
@ -47,28 +41,19 @@
 ENTRY (__floorf_vis3)
 	movwtos	%o0, %f0
 	sethi	%hi(TWO_TWENTYTHREE), %o2
 	sethi	%hi(ONE_DOT_ZERO), %o3
 	fzeros	ZERO
 	fnegs	ZERO, SIGN_BIT
 	movwtos	%o2, %f16
 	fabss	%f0, %f14
 	fcmps	%fcc3, %f14, %f16
 	fmovsuge %fcc3, ZERO, %f16
 	fands	%f0, SIGN_BIT, SIGN_BIT
 	fors	%f16, SIGN_BIT, %f16
 	siam	(1 << 2) | 3
 	fadds	%f0, %f16, %f1
 	siam	(1 << 2) | 0
 	fsubs	%f1, %f16, %f1
-	fcmps	%fcc2, %f1, %f0
+	siam	(0 << 2)
 	movwtos	%o3, %f9
 	fmovsule %fcc2, ZERO, %f9
 	fsubs	%f1, %f9, %f0
 	fabss	%f0, %f0
 	retl
-	 fors	%f0, SIGN_BIT, %f0
+	 fors	%f1, SIGN_BIT, %f0
 END (__floorf_vis3)
--- a/sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_floorf.S
+++ b/sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_floorf.S
@ -1,6 +1,6 @@
 #include <sparc-ifunc.h>
-SPARC_ASM_VIS3_IFUNC(floorf)
+SPARC_ASM_VIS3_VIS2_IFUNC(floorf)
 weak_alias (__floorf, floorf)
--- a/sysdeps/sparc/sparc64/fpu/multiarch/Makefile
+++ b/sysdeps/sparc/sparc64/fpu/multiarch/Makefile
@ -1,4 +1,6 @@
 ifeq ($(subdir),math)
 libm-sysdep_routines += s_ceil-vis2 s_ceilf-vis2 \
 			s_floor-vis2 s_floorf-vis2
 ifeq ($(have-as-vis3),yes)
 libm-sysdep_routines += m_signbitf-vis3 m_signbit-vis3 s_ceilf-vis3 \
 			s_ceil-vis3 m_finitef-vis3 m_finite-vis3 \
--- a/sysdeps/sparc/sparc64/fpu/multiarch/s_ceil-vis2.S
+++ b/sysdeps/sparc/sparc64/fpu/multiarch/s_ceil-vis2.S
@ -0,0 +1,57 @@
 /* ceil function, sparc64 vis2 version.
   Copyright (C) 2012-2013 Free Software Foundation, Inc.
   This file is part of the GNU C Library.
   Contributed by David S. Miller <davem@davemloft.net>, 2012.
   The GNU C Library is free software; you can redistribute it and/or
   modify it under the terms of the GNU Lesser General Public
   License as published by the Free Software Foundation; either
   version 2.1 of the License, or (at your option) any later version.
   The GNU C Library is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   Lesser General Public License for more details.
   You should have received a copy of the GNU Lesser General Public
   License along with the GNU C Library; if not, see
   <http://www.gnu.org/licenses/>.  */
 #include <sysdep.h>
 	/* 'siam' (Set Interval Arithmetic Mode) is used to quickly override
 	   the rounding mode during this routine.
 	   We add then subtract (or subtract than add if the initial
 	   value was negative) 2**23 to the value, then subtract it
 	   back out.
 	   This will clear out the fractional portion of the value and,
 	   with suitable 'siam' initiated rouding mode settings, round
 	   the final result in the proper direction.  */
 #define TWO_FIFTYTWO	0x43300000		/* 2**52 */
 #define ZERO		%f10			/* 0.0 */
 #define SIGN_BIT	%f12			/* -0.0 */
 ENTRY (__ceil_vis2)
 	sethi	%hi(TWO_FIFTYTWO), %o2
 	fzero	ZERO
 	sllx	%o2, 32, %o2
 	fnegd	ZERO, SIGN_BIT
 	stx	%o2, [%sp + STACK_BIAS + 128]
 	fabsd	%f0, %f14
 	ldd	[%sp + STACK_BIAS + 128], %f16
 	fcmpd	%fcc3, %f14, %f16
 	fmovduge %fcc3, ZERO, %f16
 	fand	%f0, SIGN_BIT, SIGN_BIT
 	for	%f16, SIGN_BIT, %f16
 	siam	(1 << 2) | 2
 	faddd	%f0, %f16, %f18
 	siam	(1 << 2) | 0
 	fsubd	%f18, %f16, %f18
 	siam	(0 << 2)
 	retl
 	 for	%f18, SIGN_BIT, %f0
 END (__ceil_vis2)
--- a/sysdeps/sparc/sparc64/fpu/multiarch/s_ceil-vis3.S
+++ b/sysdeps/sparc/sparc64/fpu/multiarch/s_ceil-vis3.S
@ -19,57 +19,41 @@
 #include <sysdep.h>
-	/* Since changing the rounding mode is extremely expensive, we
+	/* 'siam' (Set Interval Arithmetic Mode) is used to quickly override
-	   try to round up using a method that is rounding mode
+	   the rounding mode during this routine.
 	   agnostic.
 	   We add then subtract (or subtract than add if the initial
 	   value was negative) 2**23 to the value, then subtract it
 	   back out.
-	   This will clear out the fractional portion of the value.
+	   This will clear out the fractional portion of the value and,
-	   One of two things will happen for non-whole initial values.
+	   with suitable 'siam' initiated rouding mode settings, round
-	   Either the rounding mode will round it up, or it will be
+	   the final result in the proper direction.
 	   rounded down.  If the value started out whole, it will be
 	   equal after the addition and subtraction.  This means we
 	   can accurately detect with one test whether we need to add
 	   another 1.0 to round it up properly.
-	   VIS instructions are used to facilitate the formation of
+	   We also use VIS3 moves to avoid using the stack to transfer
-	   easier constants, and the propagation of the sign bit.  */
+	   values between float and integer registers.  */
 #define TWO_FIFTYTWO	0x43300000		/* 2**52 */
 #define ONE_DOT_ZERO	0x3ff00000		/* 1.0 */
 #define ZERO		%f10			/* 0.0 */
 #define SIGN_BIT	%f12			/* -0.0 */
 ENTRY (__ceil_vis3)
 	sethi	%hi(TWO_FIFTYTWO), %o2
 	sethi	%hi(ONE_DOT_ZERO), %o3
 	fzero	ZERO
 	sllx	%o2, 32, %o2
 	fnegd	ZERO, SIGN_BIT
 	sllx	%o3, 32, %o3
 	movxtod	%o2, %f16
 	fabsd	%f0, %f14
 	fcmpd	%fcc3, %f14, %f16
 	fmovduge %fcc3, ZERO, %f16
 	fand	%f0, SIGN_BIT, SIGN_BIT
 	for	%f16, SIGN_BIT, %f16
 	siam	(1 << 2) | 2
 	faddd	%f0, %f16, %f18
 	siam	(1 << 2) | 0
 	fsubd	%f18, %f16, %f18
-	fcmpd	%fcc2, %f18, %f0
+	siam	(0 << 2)
 	movxtod	%o3, %f20
 	fmovduge %fcc2, ZERO, %f20
 	faddd	%f18, %f20, %f0
 	fabsd	%f0, %f0
 	retl
-	 for	%f0, SIGN_BIT, %f0
+	 for	%f18, SIGN_BIT, %f0
 END (__ceil_vis3)
--- a/sysdeps/sparc/sparc64/fpu/multiarch/s_ceil.S
+++ b/sysdeps/sparc/sparc64/fpu/multiarch/s_ceil.S
@ -1,6 +1,6 @@
 #include <sparc-ifunc.h>
-SPARC_ASM_VIS3_IFUNC(ceil)
+SPARC_ASM_VIS3_VIS2_IFUNC(ceil)
 weak_alias (__ceil, ceil)
--- a/sysdeps/sparc/sparc64/fpu/multiarch/s_ceilf-vis2.S
+++ b/sysdeps/sparc/sparc64/fpu/multiarch/s_ceilf-vis2.S
@ -0,0 +1,56 @@
 /* Float ceil function, sparc64 vis2 version.
   Copyright (C) 2012-2013 Free Software Foundation, Inc.
   This file is part of the GNU C Library.
   Contributed by David S. Miller <davem@davemloft.net>, 2012.
   The GNU C Library is free software; you can redistribute it and/or
   modify it under the terms of the GNU Lesser General Public
   License as published by the Free Software Foundation; either
   version 2.1 of the License, or (at your option) any later version.
   The GNU C Library is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   Lesser General Public License for more details.
   You should have received a copy of the GNU Lesser General Public
   License along with the GNU C Library; if not, see
   <http://www.gnu.org/licenses/>.  */
 #include <sysdep.h>
 	/* 'siam' (Set Interval Arithmetic Mode) is used to quickly override
 	   the rounding mode during this routine.
 	   We add then subtract (or subtract than add if the initial
 	   value was negative) 2**23 to the value, then subtract it
 	   back out.
 	   This will clear out the fractional portion of the value and,
 	   with suitable 'siam' initiated rouding mode settings, round
 	   the final result in the proper direction.  */
 #define TWO_TWENTYTHREE	0x4b000000		/* 2**23 */
 #define ZERO		%f10			/* 0.0 */
 #define SIGN_BIT	%f12			/* -0.0 */
 ENTRY (__ceilf_vis2)
 	sethi	%hi(TWO_TWENTYTHREE), %o2
 	fzeros	ZERO
 	fnegs	ZERO, SIGN_BIT
 	st	%o2, [%sp + STACK_BIAS + 128]
 	fabss	%f1, %f14
 	ld	[%sp + STACK_BIAS + 128], %f16
 	fcmps	%fcc3, %f14, %f16
 	fmovsuge %fcc3, ZERO, %f16
 	fands	%f1, SIGN_BIT, SIGN_BIT
 	fors	%f16, SIGN_BIT, %f16
 	siam	(1 << 2) | 2
 	fadds	%f1, %f16, %f5
 	siam	(1 << 2) | 0
 	fsubs	%f5, %f16, %f5
 	siam	(0 << 2)
 	retl
 	 fors	%f5, SIGN_BIT, %f0
 END (__ceilf_vis2)
--- a/sysdeps/sparc/sparc64/fpu/multiarch/s_ceilf-vis3.S
+++ b/sysdeps/sparc/sparc64/fpu/multiarch/s_ceilf-vis3.S
@ -19,55 +19,40 @@
 #include <sysdep.h>
-	/* Since changing the rounding mode is extremely expensive, we
+	/* 'siam' (Set Interval Arithmetic Mode) is used to quickly override
-	   try to round up using a method that is rounding mode
+	   the rounding mode during this routine.
 	   agnostic.
 	   We add then subtract (or subtract than add if the initial
 	   value was negative) 2**23 to the value, then subtract it
 	   back out.
-	   This will clear out the fractional portion of the value.
+	   This will clear out the fractional portion of the value and,
-	   One of two things will happen for non-whole initial values.
+	   with suitable 'siam' initiated rouding mode settings, round
-	   Either the rounding mode will round it up, or it will be
+	   the final result in the proper direction.
 	   rounded down.  If the value started out whole, it will be
 	   equal after the addition and subtraction.  This means we
 	   can accurately detect with one test whether we need to add
 	   another 1.0 to round it up properly.
-	   VIS instructions are used to facilitate the formation of
+	   We also use VIS3 moves to avoid using the stack to transfer
-	   easier constants, and the propagation of the sign bit.  */
+	   values between float and integer registers.  */
 #define TWO_TWENTYTHREE	0x4b000000		/* 2**23 */
 #define ONE_DOT_ZERO	0x3f800000		/* 1.0 */
 #define ZERO		%f10			/* 0.0 */
 #define SIGN_BIT	%f12			/* -0.0 */
 ENTRY (__ceilf_vis3)
 	sethi	%hi(TWO_TWENTYTHREE), %o2
 	sethi	%hi(ONE_DOT_ZERO), %o3
 	fzeros	ZERO
 	fnegs	ZERO, SIGN_BIT
 	movwtos	%o2, %f16
 	fabss	%f1, %f14
 	fcmps	%fcc3, %f14, %f16
 	fmovsuge %fcc3, ZERO, %f16
 	fands	%f1, SIGN_BIT, SIGN_BIT
 	fors	%f16, SIGN_BIT, %f16
 	siam	(1 << 2) | 2
 	fadds	%f1, %f16, %f5
 	siam	(1 << 2) | 0
 	fsubs	%f5, %f16, %f5
-	fcmps	%fcc2, %f5, %f1
+	siam	(0 << 2)
 	movwtos	%o3, %f9
 	fmovsuge %fcc2, ZERO, %f9
 	fadds	%f5, %f9, %f0
 	fabss	%f0, %f0
 	retl
-	 fors	%f0, SIGN_BIT, %f0
+	 fors	%f5, SIGN_BIT, %f0
 END (__ceilf_vis3)
--- a/sysdeps/sparc/sparc64/fpu/multiarch/s_ceilf.S
+++ b/sysdeps/sparc/sparc64/fpu/multiarch/s_ceilf.S
@ -1,6 +1,6 @@
 #include <sparc-ifunc.h>
-SPARC_ASM_VIS3_IFUNC(ceilf)
+SPARC_ASM_VIS3_VIS2_IFUNC(ceilf)
 weak_alias (__ceilf, ceilf)
--- a/sysdeps/sparc/sparc64/fpu/multiarch/s_floor-vis2.S
+++ b/sysdeps/sparc/sparc64/fpu/multiarch/s_floor-vis2.S
@ -0,0 +1,57 @@
 /* floor function, sparc64 vis2 version.
   Copyright (C) 2012-2013 Free Software Foundation, Inc.
   This file is part of the GNU C Library.
   Contributed by David S. Miller <davem@davemloft.net>, 2012.
   The GNU C Library is free software; you can redistribute it and/or
   modify it under the terms of the GNU Lesser General Public
   License as published by the Free Software Foundation; either
   version 2.1 of the License, or (at your option) any later version.
   The GNU C Library is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   Lesser General Public License for more details.
   You should have received a copy of the GNU Lesser General Public
   License along with the GNU C Library; if not, see
   <http://www.gnu.org/licenses/>.  */
 #include <sysdep.h>
 	/* 'siam' (Set Interval Arithmetic Mode) is used to quickly override
 	   the rounding mode during this routine.
 	   We add then subtract (or subtract than add if the initial
 	   value was negative) 2**23 to the value, then subtract it
 	   back out.
 	   This will clear out the fractional portion of the value and,
 	   with suitable 'siam' initiated rouding mode settings, round
 	   the final result in the proper direction.  */
 #define TWO_FIFTYTWO	0x43300000		/* 2**52 */
 #define ZERO		%f10			/* 0.0 */
 #define SIGN_BIT	%f12			/* -0.0 */
 ENTRY (__floor_vis2)
 	sethi	%hi(TWO_FIFTYTWO), %o2
 	fzero	ZERO
 	sllx	%o2, 32, %o2
 	fnegd	ZERO, SIGN_BIT
 	stx	%o2, [%sp + STACK_BIAS + 128]
 	fabsd	%f0, %f14
 	ldd	[%sp + STACK_BIAS + 128], %f16
 	fcmpd	%fcc3, %f14, %f16
 	fmovduge %fcc3, ZERO, %f16
 	fand	%f0, SIGN_BIT, SIGN_BIT
 	for	%f16, SIGN_BIT, %f16
 	siam	(1 << 2) | 3
 	faddd	%f0, %f16, %f18
 	siam	(1 << 2) | 0
 	fsubd	%f18, %f16, %f18
 	siam	(0 << 2)
 	retl
 	 for	%f18, SIGN_BIT, %f0
 END (__floor_vis2)
--- a/sysdeps/sparc/sparc64/fpu/multiarch/s_floor-vis3.S
+++ b/sysdeps/sparc/sparc64/fpu/multiarch/s_floor-vis3.S
@ -19,57 +19,41 @@
 #include <sysdep.h>
-	/* Since changing the rounding mode is extremely expensive, we
+	/* 'siam' (Set Interval Arithmetic Mode) is used to quickly override
-	   try to round up using a method that is rounding mode
+	   the rounding mode during this routine.
 	   agnostic.
 	   We add then subtract (or subtract than add if the initial
 	   value was negative) 2**23 to the value, then subtract it
 	   back out.
-	   This will clear out the fractional portion of the value.
+	   This will clear out the fractional portion of the value and,
-	   One of two things will happen for non-whole initial values.
+	   with suitable 'siam' initiated rouding mode settings, round
-	   Either the rounding mode will round it up, or it will be
+	   the final result in the proper direction.
 	   rounded down.  If the value started out whole, it will be
 	   equal after the addition and subtraction.  This means we
 	   can accurately detect with one test whether we need to add
 	   another 1.0 to round it up properly.
-	   VIS instructions are used to facilitate the formation of
+	   We also use VIS3 moves to avoid using the stack to transfer
-	   easier constants, and the propagation of the sign bit.  */
+	   values between float and integer registers.  */
 #define TWO_FIFTYTWO	0x43300000		/* 2**52 */
 #define ONE_DOT_ZERO	0x3ff00000		/* 1.0 */
 #define ZERO		%f10			/* 0.0 */
 #define SIGN_BIT	%f12			/* -0.0 */
 ENTRY (__floor_vis3)
 	sethi	%hi(TWO_FIFTYTWO), %o2
 	sethi	%hi(ONE_DOT_ZERO), %o3
 	fzero	ZERO
 	sllx	%o2, 32, %o2
 	fnegd	ZERO, SIGN_BIT
 	sllx	%o3, 32, %o3
 	movxtod	%o2, %f16
 	fabsd	%f0, %f14
 	fcmpd	%fcc3, %f14, %f16
 	fmovduge %fcc3, ZERO, %f16
 	fand	%f0, SIGN_BIT, SIGN_BIT
 	for	%f16, SIGN_BIT, %f16
 	siam	(1 << 2) | 3
 	faddd	%f0, %f16, %f18
 	siam	(1 << 2) | 0
 	fsubd	%f18, %f16, %f18
-	fcmpd	%fcc2, %f18, %f0
+	siam	(0 << 2)
 	movxtod	%o3, %f20
 	fmovdule %fcc2, ZERO, %f20
 	fsubd	%f18, %f20, %f0
 	fabsd	%f0, %f0
 	retl
-	 for	%f0, SIGN_BIT, %f0
+	 for	%f18, SIGN_BIT, %f0
 END (__floor_vis3)
--- a/sysdeps/sparc/sparc64/fpu/multiarch/s_floor.S
+++ b/sysdeps/sparc/sparc64/fpu/multiarch/s_floor.S
@ -1,6 +1,6 @@
 #include <sparc-ifunc.h>
-SPARC_ASM_VIS3_IFUNC(floor)
+SPARC_ASM_VIS3_VIS2_IFUNC(floor)
 weak_alias (__floor, floor)
--- a/sysdeps/sparc/sparc64/fpu/multiarch/s_floorf-vis2.S
+++ b/sysdeps/sparc/sparc64/fpu/multiarch/s_floorf-vis2.S
@ -0,0 +1,56 @@
 /* Float floor function, sparc64 vis2 version.
   Copyright (C) 2012-2013 Free Software Foundation, Inc.
   This file is part of the GNU C Library.
   Contributed by David S. Miller <davem@davemloft.net>, 2012.
   The GNU C Library is free software; you can redistribute it and/or
   modify it under the terms of the GNU Lesser General Public
   License as published by the Free Software Foundation; either
   version 2.1 of the License, or (at your option) any later version.
   The GNU C Library is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   Lesser General Public License for more details.
   You should have received a copy of the GNU Lesser General Public
   License along with the GNU C Library; if not, see
   <http://www.gnu.org/licenses/>.  */
 #include <sysdep.h>
 	/* 'siam' (Set Interval Arithmetic Mode) is used to quickly override
 	   the rounding mode during this routine.
 	   We add then subtract (or subtract than add if the initial
 	   value was negative) 2**23 to the value, then subtract it
 	   back out.
 	   This will clear out the fractional portion of the value and,
 	   with suitable 'siam' initiated rouding mode settings, round
 	   the final result in the proper direction.  */
 #define TWO_TWENTYTHREE	0x4b000000		/* 2**23 */
 #define ZERO		%f10			/* 0.0 */
 #define SIGN_BIT	%f12			/* -0.0 */
 ENTRY (__floorf_vis2)
 	sethi	%hi(TWO_TWENTYTHREE), %o2
 	fzeros	ZERO
 	fnegs	ZERO, SIGN_BIT
 	st	%o2, [%sp + STACK_BIAS + 128]
 	fabss	%f1, %f14
 	ld	[%sp + STACK_BIAS + 128], %f16
 	fcmps	%fcc3, %f14, %f16
 	fmovsuge %fcc3, ZERO, %f16
 	fands	%f1, SIGN_BIT, SIGN_BIT
 	fors	%f16, SIGN_BIT, %f16
 	siam	(1 << 2) | 3
 	fadds	%f1, %f16, %f5
 	siam	(1 << 2) | 0
 	fsubs	%f5, %f16, %f5
 	siam	(0 << 2)
 	retl
 	 fors	%f5, SIGN_BIT, %f0
 END (__floorf_vis2)
--- a/sysdeps/sparc/sparc64/fpu/multiarch/s_floorf-vis3.S
+++ b/sysdeps/sparc/sparc64/fpu/multiarch/s_floorf-vis3.S
@ -19,55 +19,40 @@
 #include <sysdep.h>
-	/* Since changing the rounding mode is extremely expensive, we
+	/* 'siam' (Set Interval Arithmetic Mode) is used to quickly override
-	   try to round up using a method that is rounding mode
+	   the rounding mode during this routine.
 	   agnostic.
 	   We add then subtract (or subtract than add if the initial
 	   value was negative) 2**23 to the value, then subtract it
 	   back out.
-	   This will clear out the fractional portion of the value.
+	   This will clear out the fractional portion of the value and,
-	   One of two things will happen for non-whole initial values.
+	   with suitable 'siam' initiated rouding mode settings, round
-	   Either the rounding mode will round it up, or it will be
+	   the final result in the proper direction.
 	   rounded down.  If the value started out whole, it will be
 	   equal after the addition and subtraction.  This means we
 	   can accurately detect with one test whether we need to add
 	   another 1.0 to round it up properly.
-	   VIS instructions are used to facilitate the formation of
+	   We also use VIS3 moves to avoid using the stack to transfer
-	   easier constants, and the propagation of the sign bit.  */
+	   values between float and integer registers.  */
 #define TWO_TWENTYTHREE	0x4b000000		/* 2**23 */
 #define ONE_DOT_ZERO	0x3f800000		/* 1.0 */
 #define ZERO		%f10			/* 0.0 */
 #define SIGN_BIT	%f12			/* -0.0 */
 ENTRY (__floorf_vis3)
 	sethi	%hi(TWO_TWENTYTHREE), %o2
 	sethi	%hi(ONE_DOT_ZERO), %o3
 	fzeros	ZERO
 	fnegs	ZERO, SIGN_BIT
-
+	movwtos	%o2, %f16
 	movwtos %o2, %f16
 	fabss	%f1, %f14
 	fcmps	%fcc3, %f14, %f16
 	fmovsuge %fcc3, ZERO, %f16
 	fands	%f1, SIGN_BIT, SIGN_BIT
 	fors	%f16, SIGN_BIT, %f16
 	siam	(1 << 2) | 3
 	fadds	%f1, %f16, %f5
 	siam	(1 << 2) | 0
 	fsubs	%f5, %f16, %f5
-	fcmps	%fcc2, %f5, %f1
+	siam	(0 << 2)
 	movwtos	%o3, %f9
 	fmovsule %fcc2, ZERO, %f9
 	fsubs	%f5, %f9, %f0
 	fabss	%f0, %f0
 	retl
-	 fors	%f0, SIGN_BIT, %f0
+	 fors	%f5, SIGN_BIT, %f0
 END (__floorf_vis3)
--- a/sysdeps/sparc/sparc64/fpu/multiarch/s_floorf.S
+++ b/sysdeps/sparc/sparc64/fpu/multiarch/s_floorf.S
@ -1,6 +1,6 @@
 #include <sparc-ifunc.h>
-SPARC_ASM_VIS3_IFUNC(floorf)
+SPARC_ASM_VIS3_VIS2_IFUNC(floorf)
 weak_alias (__floorf, floorf)