Remove 32 bit sparc v7 support

The patch is straighforward: - The sparc32 v8 implementations are moved as the generic ones. - A configure test is added to check for either __sparc_v8__ or __sparc_v9__. - The triple names are simplified and sparc implies sparcv8. The idea is to keep support on sparcv8 architectures that does support CAS instructions, such as LEON3/LEON4. Checked on a sparcv9-linux-gnu and sparc64-linux-gnu. Tested-by: Andreas Larsson <andreas@gaisler.com>
2024-11-21 12:30:06 +00:00 · 2019-11-13 12:32:17 +00:00 · 2019-11-13 12:32:17 +00:00 · 5d9b7b9fa7
commit 5d9b7b9fa7
parent bfdb731438
26 changed files with 418 additions and 2708 deletions
--- a/4
+++ b/4
@ -80,6 +80,10 @@ Deprecated and removed features, and other changes affecting compatibility:
 * The obsolete functions ftime has been deprecated and will be removed from
  a future version of glibc.  Application should use clock_gettime instead.

+* The sparc*-*linux-gnu configurations targeting v7 or order architecture
+  are no longer supported.  For v8 only implementations with native CAS
+  instruction are still supported (such as LEON).
+
 Changes to build and runtime requirements:

  [Add changes to build and runtime requirements here]
--- a/scripts/build-many-glibcs.py
+++ b/scripts/build-many-glibcs.py
@ -358,8 +358,10 @@ class Context(object):
        self.add_config(arch='sparc64',
                        os_name='linux-gnu',
                        glibcs=[{},
+                                {'arch': 'sparcv8',
+                                 'ccopts': '-m32 -mlong-double-128 -mcpu=leon3'}],
                                {'arch': 'sparcv9',
-                                 'ccopts': '-m32 -mlong-double-128'}],
+                                 'ccopts': '-m32 -mlong-double-128 -mcpu=v9'}],
                        extra_glibcs=[{'variant': 'disable-multi-arch',
                                       'cfg': ['--disable-multi-arch']},
                                      {'variant': 'disable-multi-arch',
@ -847,11 +849,7 @@ class Context(object):
        # be touched because nothing in a build depends on the files
        # in question.
        for f in ('sysdeps/gnu/errlist.c',
-                  'sysdeps/mach/hurd/bits/errno.h',
-                  'sysdeps/sparc/sparc32/rem.S',
-                  'sysdeps/sparc/sparc32/sdiv.S',
-                  'sysdeps/sparc/sparc32/udiv.S',
-                  'sysdeps/sparc/sparc32/urem.S'):
+                  'sysdeps/mach/hurd/bits/errno.h'):
            to_touch = os.path.join(srcdir, f)
            subprocess.run(['touch', '-c', to_touch], check=True)
        for dirpath, dirnames, filenames in os.walk(srcdir):
--- a/sysdeps/sparc/preconfigure
+++ b/sysdeps/sparc/preconfigure
@ -1,24 +1,10 @@
 # preconfigure fragment for sparc.

 case "$machine" in
-sparc | sparcv[67])
+sparc | sparcv8 | supersparc | hypersparc)
 		base_machine=sparc machine=sparc/sparc32 ;;
-sparcv8 | supersparc | hypersparc)
-		base_machine=sparc machine=sparc/sparc32/sparcv8 ;;
-sparcv8plus | sparcv8plusa | sparcv9)
+sparcv8plus* | sparcv9*)
 		base_machine=sparc machine=sparc/sparc32/sparcv9 ;;
-sparcv8plusb | sparcv9b)
-		base_machine=sparc machine=sparc/sparc32/sparcv9/sparcv9b ;;
-sparcv9v)
-		base_machine=sparc machine=sparc/sparc32/sparcv9/sparcv9v ;;
-sparcv9v2)
-		base_machine=sparc machine=sparc/sparc32/sparcv9/sparcv9v2 ;;
-sparc64)
+sparc64*)
 		base_machine=sparc machine=sparc/sparc64 ;;
-sparc64b)
-		base_machine=sparc machine=sparc/sparc64/sparcv9b ;;
-sparc64v)
-		base_machine=sparc machine=sparc/sparc64/sparcv9v ;;
-sparc64v2)
-		base_machine=sparc machine=sparc/sparc64/sparcv9v2 ;;
 esac
--- a/sysdeps/sparc/sparc32/Makefile
+++ b/sysdeps/sparc/sparc32/Makefile
@ -19,35 +19,8 @@ ifeq ($(subdir),gnulib)
 sysdep_routines = dotmul umul $(divrem) alloca
 endif	# gnulib

-# We distribute these files, even though they are generated,
-# so as to avoid the need for a functioning m4 to build the library.
 divrem := sdiv udiv rem urem

-+divrem-NAME-sdiv := div
-+divrem-NAME-udiv := udiv
-+divrem-NAME-rem := rem
-+divrem-NAME-urem := urem
-+divrem-NAME = $(+divrem-NAME-$(basename $(notdir $@)))
-+divrem-OP-div := div
-+divrem-OP-udiv := div
-+divrem-OP-rem := rem
-+divrem-OP-urem := rem
-+divrem-S-div := true
-+divrem-S-rem := true
-+divrem-S-udiv := false
-+divrem-S-urem := false
-$(divrem:%=$(sysdep_dir)/sparc/sparc32/%.S): $(sysdep_dir)/sparc/sparc32/divrem.m4
-	(echo -n "define(NAME,\`.$(+divrem-NAME)')"; \
-	 echo -n " define(OP,\`$(+divrem-OP-$(+divrem-NAME))')"; \
-	 echo -n " define(S,\`$(+divrem-S-$(+divrem-NAME))')"; \
-	 echo " /* This file is generated from divrem.m4; DO NOT EDIT! */"; \
-	 cat $<) | $(M4) > $@-tmp
-# Make it unwritable so noone will edit it by mistake.
-	-chmod a-w $@-tmp
-	mv -f $@-tmp $@
-
-sysdep-realclean := $(sysdep-realclean) $(divrem:%=sysdeps/sparc/sparc32/%.S)
-
 # libgcc __divdi3 and __moddi3 uses .udiv and since it is also exported by
 # libc.so linker will create PLTs for the symbol.  To avoid it we strong alias
 # the exported libc one to __wrap_.udiv and use linker option --wrap to make any
--- a/sysdeps/sparc/sparc32/addmul_1.S
+++ b/sysdeps/sparc/sparc32/addmul_1.S
@ -1,146 +1,118 @@
-! SPARC __mpn_addmul_1 -- Multiply a limb vector with a limb and add
-! the result to a second limb vector.
-!
+! SPARC v8 __mpn_addmul_1 -- Multiply a limb vector with a limb and
+! add the result to a second limb vector.
+
 ! Copyright (C) 1992-2019 Free Software Foundation, Inc.
-!
+
 ! This file is part of the GNU MP Library.
-!
+
 ! The GNU MP Library is free software; you can redistribute it and/or modify
 ! it under the terms of the GNU Lesser General Public License as published by
 ! the Free Software Foundation; either version 2.1 of the License, or (at your
 ! option) any later version.
-!
+
 ! The GNU MP Library is distributed in the hope that it will be useful, but
 ! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
 ! or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
 ! License for more details.
-!
+
 ! You should have received a copy of the GNU Lesser General Public License
 ! along with the GNU MP Library; see the file COPYING.LIB.  If not,
 ! see <https://www.gnu.org/licenses/>.


 ! INPUT PARAMETERS
-! RES_PTR	o0
-! S1_PTR	o1
-! SIZE		o2
-! S2_LIMB	o3
+! res_ptr	o0
+! s1_ptr	o1
+! size		o2
+! s2_limb	o3

 #include <sysdep.h>

 ENTRY(__mpn_addmul_1)
-	! Make S1_PTR and RES_PTR point at the end of their blocks
-	! and put (- 4 x SIZE) in index/loop counter.
-	sll	%o2,2,%o2
-	add	%o0,%o2,%o4	! RES_PTR in o4 since o0 is retval
-	add	%o1,%o2,%o1
-	sub	%g0,%o2,%o2
+	ld	[%o1+0],%o4	! 1
+	sll	%o2,4,%g1
+	orcc	%g0,%g0,%g2
+	mov	%o7,%g4			! Save return address register
+	and	%g1,(4-1)<<4,%g1
+1:	call	2f
+	 add	%o7,3f-1b,%g3
+2:	jmp	%g3+%g1
+	 mov	%g4,%o7			! Restore return address register

-	cmp	%o3,0xfff
-	bgu	LOC(large)
+	.align	4
+3:
+LOC(00):
+	add	%o0,-4,%o0
+	b	LOC(loop00)		/* 4, 8, 12, ... */
+	 add	%o1,-4,%o1
+	nop
+LOC(01):
+	b	LOC(loop01)		/* 1, 5, 9, ... */
+	 nop
+	nop
+	nop
+LOC(10):
+	add	%o0,-12,%o0	/* 2, 6, 10, ... */
+	b	LOC(loop10)
+	 add	%o1,4,%o1
+	nop
+LOC(11):
+	add	%o0,-8,%o0	/* 3, 7, 11, ... */
+	b	LOC(loop11)
+	 add	%o1,-8,%o1
 	nop

-	ld	[%o1+%o2],%o5
-	mov	0,%o0
-	b	LOC(0)
-	 add	%o4,-4,%o4
-LOC(loop0):
-	addcc	%o5,%g1,%g1
-	ld	[%o1+%o2],%o5
-	addx	%o0,%g0,%o0
-	st	%g1,[%o4+%o2]
-LOC(0):	wr	%g0,%o3,%y
-	sra	%o5,31,%g2
-	and	%o3,%g2,%g2
-	andcc	%g1,0,%g1
-	mulscc	%g1,%o5,%g1
- 	mulscc	%g1,%o5,%g1
- 	mulscc	%g1,%o5,%g1
- 	mulscc	%g1,%o5,%g1
-	mulscc	%g1,%o5,%g1
-	mulscc	%g1,%o5,%g1
-	mulscc	%g1,%o5,%g1
-	mulscc	%g1,%o5,%g1
-	mulscc	%g1,%o5,%g1
-	mulscc	%g1,%o5,%g1
-	mulscc	%g1,%o5,%g1
-	mulscc	%g1,%o5,%g1
-	mulscc	%g1,0,%g1
-	sra	%g1,20,%g4
-	sll	%g1,12,%g1
- 	rd	%y,%g3
-	srl	%g3,20,%g3
-	or	%g1,%g3,%g1
-
-	addcc	%g1,%o0,%g1
-	addx	%g2,%g4,%o0	! add sign-compensation and cy to hi limb
-	addcc	%o2,4,%o2	! loop counter
-	bne	LOC(loop0)
-	 ld	[%o4+%o2],%o5
-
-	addcc	%o5,%g1,%g1
-	addx	%o0,%g0,%o0
-	retl
-	st	%g1,[%o4+%o2]
-
-
-LOC(large):
-	ld	[%o1+%o2],%o5
-	mov	0,%o0
-	sra	%o3,31,%g4	! g4 = mask of ones iff S2_LIMB < 0
-	b	LOC(1)
-	 add	%o4,-4,%o4
 LOC(loop):
-	addcc	%o5,%g3,%g3
-	ld	[%o1+%o2],%o5
-	addx	%o0,%g0,%o0
-	st	%g3,[%o4+%o2]
-LOC(1):	wr	%g0,%o5,%y
-	and	%o5,%g4,%g2
-	andcc	%g0,%g0,%g1
-	mulscc	%g1,%o3,%g1
-	mulscc	%g1,%o3,%g1
-	mulscc	%g1,%o3,%g1
-	mulscc	%g1,%o3,%g1
-	mulscc	%g1,%o3,%g1
-	mulscc	%g1,%o3,%g1
-	mulscc	%g1,%o3,%g1
-	mulscc	%g1,%o3,%g1
-	mulscc	%g1,%o3,%g1
-	mulscc	%g1,%o3,%g1
-	mulscc	%g1,%o3,%g1
-	mulscc	%g1,%o3,%g1
-	mulscc	%g1,%o3,%g1
-	mulscc	%g1,%o3,%g1
-	mulscc	%g1,%o3,%g1
-	mulscc	%g1,%o3,%g1
-	mulscc	%g1,%o3,%g1
-	mulscc	%g1,%o3,%g1
-	mulscc	%g1,%o3,%g1
-	mulscc	%g1,%o3,%g1
-	mulscc	%g1,%o3,%g1
-	mulscc	%g1,%o3,%g1
-	mulscc	%g1,%o3,%g1
-	mulscc	%g1,%o3,%g1
-	mulscc	%g1,%o3,%g1
-	mulscc	%g1,%o3,%g1
-	mulscc	%g1,%o3,%g1
-	mulscc	%g1,%o3,%g1
-	mulscc	%g1,%o3,%g1
-	mulscc	%g1,%o3,%g1
-	mulscc	%g1,%o3,%g1
-	mulscc	%g1,%o3,%g1
-	mulscc	%g1,%g0,%g1
-	rd	%y,%g3
-	addcc	%g3,%o0,%g3
-	addx	%g2,%g1,%o0
-	addcc	%o2,4,%o2
-	bne	LOC(loop)
-	 ld	[%o4+%o2],%o5
+	addcc	%g3,%g2,%g3	! 1
+	ld	[%o1+4],%o4	! 2
+	rd	%y,%g2		! 1
+	addx	%g0,%g2,%g2
+	ld	[%o0+0],%g1	! 2
+	addcc	%g1,%g3,%g3
+	st	%g3,[%o0+0]	! 1
+LOC(loop00):
+	umul	%o4,%o3,%g3	! 2
+	ld	[%o0+4],%g1	! 2
+	addxcc	%g3,%g2,%g3	! 2
+	ld	[%o1+8],%o4	! 3
+	rd	%y,%g2		! 2
+	addx	%g0,%g2,%g2
+	nop
+	addcc	%g1,%g3,%g3
+	st	%g3,[%o0+4]	! 2
+LOC(loop11):
+	umul	%o4,%o3,%g3	! 3
+	addxcc	%g3,%g2,%g3	! 3
+	ld	[%o1+12],%o4	! 4
+	rd	%y,%g2		! 3
+	add	%o1,16,%o1
+	addx	%g0,%g2,%g2
+	ld	[%o0+8],%g1	! 2
+	addcc	%g1,%g3,%g3
+	st	%g3,[%o0+8]	! 3
+LOC(loop10):
+	umul	%o4,%o3,%g3	! 4
+	addxcc	%g3,%g2,%g3	! 4
+	ld	[%o1+0],%o4	! 1
+	rd	%y,%g2		! 4
+	addx	%g0,%g2,%g2
+	ld	[%o0+12],%g1	! 2
+	addcc	%g1,%g3,%g3
+	st	%g3,[%o0+12]	! 4
+	add	%o0,16,%o0
+	addx	%g0,%g2,%g2
+LOC(loop01):
+	addcc	%o2,-4,%o2
+	bg	LOC(loop)
+	 umul	%o4,%o3,%g3	! 1

-	addcc	%o5,%g3,%g3
-	addx	%o0,%g0,%o0
+	addcc	%g3,%g2,%g3	! 4
+	rd	%y,%g2		! 4
+	addx	%g0,%g2,%g2
+	ld	[%o0+0],%g1	! 2
+	addcc	%g1,%g3,%g3
+	st	%g3,[%o0+0]	! 4
 	retl
-	st	%g3,[%o4+%o2]
+	 addx	%g0,%g2,%o0

 END(__mpn_addmul_1)
--- a/sysdeps/sparc/sparc32/configure
+++ b/sysdeps/sparc/sparc32/configure
@ -0,0 +1,162 @@
+# This file is generated from configure.ac by Autoconf.  DO NOT EDIT!
+ # Local configure fragment for sysdeps/sparc/sparc32
+
+# Test if compiler targets at least sparcv8.
+
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for grep that handles long lines and -e" >&5
+$as_echo_n "checking for grep that handles long lines and -e... " >&6; }
+if ${ac_cv_path_GREP+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  if test -z "$GREP"; then
+  ac_path_GREP_found=false
+  # Loop through the user's path and test for each of PROGNAME-LIST
+  as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH$PATH_SEPARATOR/usr/xpg4/bin
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+    for ac_prog in grep ggrep; do
+    for ac_exec_ext in '' $ac_executable_extensions; do
+      ac_path_GREP="$as_dir/$ac_prog$ac_exec_ext"
+      as_fn_executable_p "$ac_path_GREP" || continue
+# Check for GNU ac_path_GREP and select it if it is found.
+  # Check for GNU $ac_path_GREP
+case `"$ac_path_GREP" --version 2>&1` in
+*GNU*)
+  ac_cv_path_GREP="$ac_path_GREP" ac_path_GREP_found=:;;
+*)
+  ac_count=0
+  $as_echo_n 0123456789 >"conftest.in"
+  while :
+  do
+    cat "conftest.in" "conftest.in" >"conftest.tmp"
+    mv "conftest.tmp" "conftest.in"
+    cp "conftest.in" "conftest.nl"
+    $as_echo 'GREP' >> "conftest.nl"
+    "$ac_path_GREP" -e 'GREP$' -e '-(cannot match)-' < "conftest.nl" >"conftest.out" 2>/dev/null || break
+    diff "conftest.out" "conftest.nl" >/dev/null 2>&1 || break
+    as_fn_arith $ac_count + 1 && ac_count=$as_val
+    if test $ac_count -gt ${ac_path_GREP_max-0}; then
+      # Best one so far, save it but keep looking for a better one
+      ac_cv_path_GREP="$ac_path_GREP"
+      ac_path_GREP_max=$ac_count
+    fi
+    # 10*(2^10) chars as input seems more than enough
+    test $ac_count -gt 10 && break
+  done
+  rm -f conftest.in conftest.tmp conftest.nl conftest.out;;
+esac
+
+      $ac_path_GREP_found && break 3
+    done
+  done
+  done
+IFS=$as_save_IFS
+  if test -z "$ac_cv_path_GREP"; then
+    as_fn_error $? "no acceptable grep could be found in $PATH$PATH_SEPARATOR/usr/xpg4/bin" "$LINENO" 5
+  fi
+else
+  ac_cv_path_GREP=$GREP
+fi
+
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_path_GREP" >&5
+$as_echo "$ac_cv_path_GREP" >&6; }
+ GREP="$ac_cv_path_GREP"
+
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for egrep" >&5
+$as_echo_n "checking for egrep... " >&6; }
+if ${ac_cv_path_EGREP+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  if echo a | $GREP -E '(a|b)' >/dev/null 2>&1
+   then ac_cv_path_EGREP="$GREP -E"
+   else
+     if test -z "$EGREP"; then
+  ac_path_EGREP_found=false
+  # Loop through the user's path and test for each of PROGNAME-LIST
+  as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH$PATH_SEPARATOR/usr/xpg4/bin
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+    for ac_prog in egrep; do
+    for ac_exec_ext in '' $ac_executable_extensions; do
+      ac_path_EGREP="$as_dir/$ac_prog$ac_exec_ext"
+      as_fn_executable_p "$ac_path_EGREP" || continue
+# Check for GNU ac_path_EGREP and select it if it is found.
+  # Check for GNU $ac_path_EGREP
+case `"$ac_path_EGREP" --version 2>&1` in
+*GNU*)
+  ac_cv_path_EGREP="$ac_path_EGREP" ac_path_EGREP_found=:;;
+*)
+  ac_count=0
+  $as_echo_n 0123456789 >"conftest.in"
+  while :
+  do
+    cat "conftest.in" "conftest.in" >"conftest.tmp"
+    mv "conftest.tmp" "conftest.in"
+    cp "conftest.in" "conftest.nl"
+    $as_echo 'EGREP' >> "conftest.nl"
+    "$ac_path_EGREP" 'EGREP$' < "conftest.nl" >"conftest.out" 2>/dev/null || break
+    diff "conftest.out" "conftest.nl" >/dev/null 2>&1 || break
+    as_fn_arith $ac_count + 1 && ac_count=$as_val
+    if test $ac_count -gt ${ac_path_EGREP_max-0}; then
+      # Best one so far, save it but keep looking for a better one
+      ac_cv_path_EGREP="$ac_path_EGREP"
+      ac_path_EGREP_max=$ac_count
+    fi
+    # 10*(2^10) chars as input seems more than enough
+    test $ac_count -gt 10 && break
+  done
+  rm -f conftest.in conftest.tmp conftest.nl conftest.out;;
+esac
+
+      $ac_path_EGREP_found && break 3
+    done
+  done
+  done
+IFS=$as_save_IFS
+  if test -z "$ac_cv_path_EGREP"; then
+    as_fn_error $? "no acceptable egrep could be found in $PATH$PATH_SEPARATOR/usr/xpg4/bin" "$LINENO" 5
+  fi
+else
+  ac_cv_path_EGREP=$EGREP
+fi
+
+   fi
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_path_EGREP" >&5
+$as_echo "$ac_cv_path_EGREP" >&6; }
+ EGREP="$ac_cv_path_EGREP"
+
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for at least sparcv8 support" >&5
+$as_echo_n "checking for at least sparcv8 support... " >&6; }
+if ${libc_cv_sparcv8+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+#if defined (__sparc_v8__) || defined (__sparc_v9__)
+                      yes
+                     #endif
+
+_ACEOF
+if (eval "$ac_cpp conftest.$ac_ext") 2>&5 |
+  $EGREP "yes" >/dev/null 2>&1; then :
+  libc_cv_sparcv8=yes
+else
+  libc_cv_sparcv8=no
+fi
+rm -f conftest*
+
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $libc_cv_sparcv8" >&5
+$as_echo "$libc_cv_sparcv8" >&6; }
+if test $libc_cv_sparcv8 = no; then
+  as_fn_error $? "no support for pre-v8 sparc" "$LINENO" 5
+fi
--- a/sysdeps/sparc/sparc32/configure.ac
+++ b/sysdeps/sparc/sparc32/configure.ac
@ -0,0 +1,13 @@
+GLIBC_PROVIDES dnl See aclocal.m4 in the top level source directory.
+# Local configure fragment for sysdeps/sparc/sparc32
+
+# Test if compiler targets at least sparcv8.
+AC_CACHE_CHECK([for at least sparcv8 support],
+  [libc_cv_sparcv8],
+  [AC_EGREP_CPP(yes,[#if defined (__sparc_v8__) || defined (__sparc_v9__)
+                      yes
+                     #endif
+  ], libc_cv_sparcv8=yes, libc_cv_sparcv8=no)])
+if test $libc_cv_sparcv8 = no; then
+  AC_MSG_ERROR([no support for pre-v8 sparc])
+fi
--- a/sysdeps/sparc/sparc32/divrem.m4
+++ b/sysdeps/sparc/sparc32/divrem.m4
@ -1,234 +0,0 @@
-/*
- * Division and remainder, from Appendix E of the Sparc Version 8
- * Architecture Manual, with fixes from Gordon Irlam.
- */
-
-/*
- * Input: dividend and divisor in %o0 and %o1 respectively.
- *
- * m4 parameters:
- *  NAME	name of function to generate
- *  OP		OP=div => %o0 / %o1; OP=rem => %o0 % %o1
- *  S		S=true => signed; S=false => unsigned
- *
- * Algorithm parameters:
- *  N		how many bits per iteration we try to get (4)
- *  WORDSIZE	total number of bits (32)
- *
- * Derived constants:
- *  TOPBITS	number of bits in the top `decade' of a number
- *
- * Important variables:
- *  Q		the partial quotient under development (initially 0)
- *  R		the remainder so far, initially the dividend
- *  ITER	number of main division loop iterations required;
- *		equal to ceil(log2(quotient) / N).  Note that this
- *		is the log base (2^N) of the quotient.
- *  V		the current comparand, initially divisor*2^(ITER*N-1)
- *
- * Cost:
- *  Current estimate for non-large dividend is
- *	ceil(log2(quotient) / N) * (10 + 7N/2) + C
- *  A large dividend is one greater than 2^(31-TOPBITS) and takes a
- *  different path, as the upper bits of the quotient must be developed
- *  one bit at a time.
- */
-
-define(N, `4')dnl
-define(WORDSIZE, `32')dnl
-define(TOPBITS, eval(WORDSIZE - N*((WORDSIZE-1)/N)))dnl
-dnl
-define(dividend, `%o0')dnl
-define(divisor, `%o1')dnl
-define(Q, `%o2')dnl
-define(R, `%o3')dnl
-define(ITER, `%o4')dnl
-define(V, `%o5')dnl
-dnl
-dnl m4 reminder: ifelse(a,b,c,d) => if a is b, then c, else d
-define(T, `%g1')dnl
-define(SC, `%g2')dnl
-ifelse(S, `true', `define(SIGN, `%g3')')dnl
-
-dnl
-dnl This is the recursive definition for developing quotient digits.
-dnl
-dnl Parameters:
-dnl  $1	the current depth, 1 <= $1 <= N
-dnl  $2	the current accumulation of quotient bits
-dnl  N	max depth
-dnl
-dnl We add a new bit to $2 and either recurse or insert the bits in
-dnl the quotient.  R, Q, and V are inputs and outputs as defined above;
-dnl the condition codes are expected to reflect the input R, and are
-dnl modified to reflect the output R.
-dnl
-define(DEVELOP_QUOTIENT_BITS,
-`	! depth $1, accumulated bits $2
-	bl	LOC($1.eval(2**N+$2))
-	srl	V,1,V
-	! remainder is positive
-	subcc	R,V,R
-	ifelse($1, N,
-	`	b	9f
-		add	Q, ($2*2+1), Q
-', `	DEVELOP_QUOTIENT_BITS(incr($1), `eval(2*$2+1)')')
-LOC($1.eval(2**N+$2)):
-	! remainder is negative
-	addcc	R,V,R
-	ifelse($1, N,
-	`	b	9f
-		add	Q, ($2*2-1), Q
-', `	DEVELOP_QUOTIENT_BITS(incr($1), `eval(2*$2-1)')')
-ifelse($1, 1, `9:')')dnl
-
-#include <sysdep.h>
-#include <sys/trap.h>
-
-ENTRY(NAME)
-ifelse(S, `true',
-`	! compute sign of result; if neither is negative, no problem
-	orcc	divisor, dividend, %g0	! either negative?
-	bge	2f			! no, go do the divide
-ifelse(OP, `div',
-`	xor	divisor, dividend, SIGN	! compute sign in any case',
-`	mov	dividend, SIGN		! sign of remainder matches dividend')
-	tst	divisor
-	bge	1f
-	tst	dividend
-	! divisor is definitely negative; dividend might also be negative
-	bge	2f			! if dividend not negative...
-	sub	%g0, divisor, divisor	! in any case, make divisor nonneg
-1:	! dividend is negative, divisor is nonnegative
-	sub	%g0, dividend, dividend	! make dividend nonnegative
-2:
-')
-	! Ready to divide.  Compute size of quotient; scale comparand.
-	orcc	divisor, %g0, V
-	bne	1f
-	mov	dividend, R
-
-		! Divide by zero trap.  If it returns, return 0 (about as
-		! wrong as possible, but that is what SunOS does...).
-		ta	ST_DIV0
-		retl
-		clr	%o0
-
-1:
-	cmp	R, V			! if divisor exceeds dividend, done
-	blu	LOC(got_result)		! (and algorithm fails otherwise)
-	clr	Q
-	sethi	%hi(1 << (WORDSIZE - TOPBITS - 1)), T
-	cmp	R, T
-	blu	LOC(not_really_big)
-	clr	ITER
-
-	! `Here the dividend is >= 2**(31-N) or so.  We must be careful here,
-	! as our usual N-at-a-shot divide step will cause overflow and havoc.
-	! The number of bits in the result here is N*ITER+SC, where SC <= N.
-	! Compute ITER in an unorthodox manner: know we need to shift V into
-	! the top decade: so do not even bother to compare to R.'
-	1:
-		cmp	V, T
-		bgeu	3f
-		mov	1, SC
-		sll	V, N, V
-		b	1b
-		add	ITER, 1, ITER
-
-	! Now compute SC.
-	2:	addcc	V, V, V
-		bcc	LOC(not_too_big)
-		add	SC, 1, SC
-
-		! We get here if the divisor overflowed while shifting.
-		! This means that R has the high-order bit set.
-		! Restore V and subtract from R.
-		sll	T, TOPBITS, T	! high order bit
-		srl	V, 1, V		! rest of V
-		add	V, T, V
-		b	LOC(do_single_div)
-		sub	SC, 1, SC
-
-	LOC(not_too_big):
-	3:	cmp	V, R
-		blu	2b
-		nop
-		be	LOC(do_single_div)
-		nop
-	/* NB: these are commented out in the V8-Sparc manual as well */
-	/* (I do not understand this) */
-	! V > R: went too far: back up 1 step
-	!	srl	V, 1, V
-	!	dec	SC
-	! do single-bit divide steps
-	!
-	! We have to be careful here.  We know that R >= V, so we can do the
-	! first divide step without thinking.  BUT, the others are conditional,
-	! and are only done if R >= 0.  Because both R and V may have the high-
-	! order bit set in the first step, just falling into the regular
-	! division loop will mess up the first time around.
-	! So we unroll slightly...
-	LOC(do_single_div):
-		subcc	SC, 1, SC
-		bl	LOC(end_regular_divide)
-		nop
-		sub	R, V, R
-		mov	1, Q
-		b	LOC(end_single_divloop)
-		nop
-	LOC(single_divloop):
-		sll	Q, 1, Q
-		bl	1f
-		srl	V, 1, V
-		! R >= 0
-		sub	R, V, R
-		b	2f
-		add	Q, 1, Q
-	1:	! R < 0
-		add	R, V, R
-		sub	Q, 1, Q
-	2:
-	LOC(end_single_divloop):
-		subcc	SC, 1, SC
-		bge	LOC(single_divloop)
-		tst	R
-		b,a	LOC(end_regular_divide)
-
-LOC(not_really_big):
-1:
-	sll	V, N, V
-	cmp	V, R
-	bleu	1b
-	addcc	ITER, 1, ITER
-	be	LOC(got_result)
-	sub	ITER, 1, ITER
-
-	tst	R	! set up for initial iteration
-LOC(divloop):
-	sll	Q, N, Q
-	DEVELOP_QUOTIENT_BITS(1, 0)
-LOC(end_regular_divide):
-	subcc	ITER, 1, ITER
-	bge	LOC(divloop)
-	tst	R
-	bl,a	LOC(got_result)
-	! non-restoring fixup here (one instruction only!)
-ifelse(OP, `div',
-`	sub	Q, 1, Q
-', `	add	R, divisor, R
-')
-
-LOC(got_result):
-ifelse(S, `true',
-`	! check to see if answer should be < 0
-	tst	SIGN
-	bl,a	1f
-	ifelse(OP, `div', `sub %g0, Q, Q', `sub %g0, R, R')
-1:')
-	retl
-	ifelse(OP, `div', `mov Q, %o0', `mov R, %o0')
-
-END(NAME)
-ifelse(OP, `div', ifelse(S, `false', `strong_alias (.udiv, __wrap_.udiv)
-'))dnl
--- a/sysdeps/sparc/sparc32/dotmul.S
+++ b/sysdeps/sparc/sparc32/dotmul.S
@ -1,127 +1,13 @@
 /*
- * Signed multiply, from Appendix E of the Sparc Version 8
- * Architecture Manual.
- */
-
-/*
- * Returns %o0 * %o1 in %o1%o0 (i.e., %o1 holds the upper 32 bits of
- * the 64-bit product).
- *
- * This code optimizes short (less than 13-bit) multiplies.
+ * Sparc v8 has multiply.
 */

 #include <sysdep.h>

-
 ENTRY(.mul)
-	mov	%o0, %y		! multiplier -> Y
-	andncc	%o0, 0xfff, %g0	! test bits 12..31
-	be	LOC(mul_shortway)	! if zero, can do it the short way
-	andcc	%g0, %g0, %o4	! zero the partial product and clear N and V

-	/*
-	 * Long multiply.  32 steps, followed by a final shift step.
-	 */
-	mulscc	%o4, %o1, %o4	! 1
-	mulscc	%o4, %o1, %o4	! 2
-	mulscc	%o4, %o1, %o4	! 3
-	mulscc	%o4, %o1, %o4	! 4
-	mulscc	%o4, %o1, %o4	! 5
-	mulscc	%o4, %o1, %o4	! 6
-	mulscc	%o4, %o1, %o4	! 7
-	mulscc	%o4, %o1, %o4	! 8
-	mulscc	%o4, %o1, %o4	! 9
-	mulscc	%o4, %o1, %o4	! 10
-	mulscc	%o4, %o1, %o4	! 11
-	mulscc	%o4, %o1, %o4	! 12
-	mulscc	%o4, %o1, %o4	! 13
-	mulscc	%o4, %o1, %o4	! 14
-	mulscc	%o4, %o1, %o4	! 15
-	mulscc	%o4, %o1, %o4	! 16
-	mulscc	%o4, %o1, %o4	! 17
-	mulscc	%o4, %o1, %o4	! 18
-	mulscc	%o4, %o1, %o4	! 19
-	mulscc	%o4, %o1, %o4	! 20
-	mulscc	%o4, %o1, %o4	! 21
-	mulscc	%o4, %o1, %o4	! 22
-	mulscc	%o4, %o1, %o4	! 23
-	mulscc	%o4, %o1, %o4	! 24
-	mulscc	%o4, %o1, %o4	! 25
-	mulscc	%o4, %o1, %o4	! 26
-	mulscc	%o4, %o1, %o4	! 27
-	mulscc	%o4, %o1, %o4	! 28
-	mulscc	%o4, %o1, %o4	! 29
-	mulscc	%o4, %o1, %o4	! 30
-	mulscc	%o4, %o1, %o4	! 31
-	mulscc	%o4, %o1, %o4	! 32
-	mulscc	%o4, %g0, %o4	! final shift
-
-	! If %o0 was negative, the result is
-	!	(%o0 * %o1) + (%o1 << 32))
-	! We fix that here.
-
-#if 0
-	tst	%o0
-	bge	1f
-	rd	%y, %o0
-
-	! %o0 was indeed negative; fix upper 32 bits of result by subtracting
-	! %o1 (i.e., return %o4 - %o1 in %o1).
+	smul	%o0, %o1, %o0
 	retl
-	sub	%o4, %o1, %o1
-
-1:
-	retl
-	mov	%o4, %o1
-#else
-	/* Faster code adapted from tege@sics.se's code for umul.S.  */
-	sra	%o0, 31, %o2	! make mask from sign bit
-	and	%o1, %o2, %o2	! %o2 = 0 or %o1, depending on sign of %o0
-	rd	%y, %o0		! get lower half of product
-	retl
-	sub	%o4, %o2, %o1	! subtract compensation
-				!  and put upper half in place
-#endif
-
-LOC(mul_shortway):
-	/*
-	 * Short multiply.  12 steps, followed by a final shift step.
-	 * The resulting bits are off by 12 and (32-12) = 20 bit positions,
-	 * but there is no problem with %o0 being negative (unlike above).
-	 */
-	mulscc	%o4, %o1, %o4	! 1
-	mulscc	%o4, %o1, %o4	! 2
-	mulscc	%o4, %o1, %o4	! 3
-	mulscc	%o4, %o1, %o4	! 4
-	mulscc	%o4, %o1, %o4	! 5
-	mulscc	%o4, %o1, %o4	! 6
-	mulscc	%o4, %o1, %o4	! 7
-	mulscc	%o4, %o1, %o4	! 8
-	mulscc	%o4, %o1, %o4	! 9
-	mulscc	%o4, %o1, %o4	! 10
-	mulscc	%o4, %o1, %o4	! 11
-	mulscc	%o4, %o1, %o4	! 12
-	mulscc	%o4, %g0, %o4	! final shift
-
-	/*
-	 *  %o4 has 20 of the bits that should be in the low part of the
-	 * result; %y has the bottom 12 (as %y's top 12).  That is:
-	 *
-	 *	  %o4		    %y
-	 * +----------------+----------------+
-	 * | -12- |   -20-  | -12- |   -20-  |
-	 * +------(---------+------)---------+
-	 *  --hi-- ----low-part----
-	 *
-	 * The upper 12 bits of %o4 should be sign-extended to form the
-	 * high part of the product (i.e., highpart = %o4 >> 20).
-	 */
-
-	rd	%y, %o5
-	sll	%o4, 12, %o0	! shift middle bits left 12
-	srl	%o5, 20, %o5	! shift low bits right 20, zero fill at left
-	or	%o5, %o0, %o0	! construct low part of result
-	retl
-	sra	%o4, 20, %o1	! ... and extract high part of result
+	 rd	%y, %o1

 END(.mul)
--- a/sysdeps/sparc/sparc32/mul_1.S
+++ b/sysdeps/sparc/sparc32/mul_1.S
@ -1,198 +1,102 @@
-! SPARC __mpn_mul_1 -- Multiply a limb vector with a limb and store
-! the result in a second limb vector.
-!
+! SPARC v8 __mpn_mul_1 -- Multiply a limb vector with a single limb and
+! store the product in a second limb vector.
+
 ! Copyright (C) 1992-2019 Free Software Foundation, Inc.
-!
+
 ! This file is part of the GNU MP Library.
-!
+
 ! The GNU MP Library is free software; you can redistribute it and/or modify
 ! it under the terms of the GNU Lesser General Public License as published by
 ! the Free Software Foundation; either version 2.1 of the License, or (at your
 ! option) any later version.
-!
+
 ! The GNU MP Library is distributed in the hope that it will be useful, but
 ! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
 ! or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
 ! License for more details.
-!
+
 ! You should have received a copy of the GNU Lesser General Public License
 ! along with the GNU MP Library; see the file COPYING.LIB.  If not,
 ! see <https://www.gnu.org/licenses/>.


 ! INPUT PARAMETERS
-! RES_PTR	o0
-! S1_PTR	o1
-! SIZE		o2
-! S2_LIMB	o3
-
-! ADD CODE FOR SMALL MULTIPLIERS!
-!1:	ld
-!	st
-!
-!2:	ld	,a
-!	addxcc	a,a,x
-!	st	x,
-!
-!3_unrolled:
-!	ld	,a
-!	addxcc	a,a,x1		! 2a + cy
-!	addx	%g0,%g0,x2
-!	addcc	a,x1,x		! 3a + c
-!	st	x,
-!
-!	ld	,a
-!	addxcc	a,a,y1
-!	addx	%g0,%g0,y2
-!	addcc	a,y1,x
-!	st	x,
-!
-!4_unrolled:
-!	ld	,a
-!	srl	a,2,x1		! 4a
-!	addxcc	y2,x1,x
-!	sll	a,30,x2
-!	st	x,
-!
-!	ld	,a
-!	srl	a,2,y1
-!	addxcc	x2,y1,y
-!	sll	a,30,y2
-!	st	x,
-!
-!5_unrolled:
-!	ld	,a
-!	srl	a,2,x1		! 4a
-!	addxcc	a,x1,x		! 5a + c
-!	sll	a,30,x2
-!	addx	%g0,x2,x2
-!	st	x,
-!
-!	ld	,a
-!	srl	a,2,y1
-!	addxcc	a,y1,x
-!	sll	a,30,y2
-!	addx	%g0,y2,y2
-!	st	x,
-!
-!8_unrolled:
-!	ld	,a
-!	srl	a,3,x1		! 8a
-!	addxcc	y2,x1,x
-!	sll	a,29,x2
-!	st	x,
-!
-!	ld	,a
-!	srl	a,3,y1
-!	addxcc	x2,y1,y
-!	sll	a,29,y2
-!	st	x,
+! res_ptr	o0
+! s1_ptr	o1
+! size		o2
+! s2_limb	o3

 #include <sysdep.h>

 ENTRY(__mpn_mul_1)
-	! Make S1_PTR and RES_PTR point at the end of their blocks
-	! and put (- 4 x SIZE) in index/loop counter.
-	sll	%o2,2,%o2
-	add	%o0,%o2,%o4	! RES_PTR in o4 since o0 is retval
-	add	%o1,%o2,%o1
-	sub	%g0,%o2,%o2
+	sll	%o2,4,%g1
+	mov	%o7,%g4			! Save return address register
+	and	%g1,(4-1)<<4,%g1
+1:	call	2f
+	 add	%o7,3f-1b,%g3
+2:	mov	%g4,%o7			! Restore return address register
+	jmp	%g3+%g1
+	 ld	[%o1+0],%o4	! 1

-	cmp	%o3,0xfff
-	bgu	LOC(large)
+	.align	4
+3:
+LOC(00):
+	add	%o0,-4,%o0
+	add	%o1,-4,%o1
+	b	LOC(loop00)		/* 4, 8, 12, ... */
+	 orcc	%g0,%g0,%g2
+LOC(01):
+	b	LOC(loop01)		/* 1, 5, 9, ... */
+	 orcc	%g0,%g0,%g2
 	nop
+	nop
+LOC(10):
+	add	%o0,-12,%o0	/* 2, 6, 10, ... */
+	add	%o1,4,%o1
+	b	LOC(loop10)
+	 orcc	%g0,%g0,%g2
+	nop
+LOC(11):
+	add	%o0,-8,%o0	/* 3, 7, 11, ... */
+	add	%o1,-8,%o1
+	b	LOC(loop11)
+	 orcc	%g0,%g0,%g2

-	ld	[%o1+%o2],%o5
-	mov	0,%o0
-	b	LOC(0)
-	 add	%o4,-4,%o4
-LOC(loop0):
-	st	%g1,[%o4+%o2]
-LOC(0):	wr	%g0,%o3,%y
-	sra	%o5,31,%g2
-	and	%o3,%g2,%g2
-	andcc	%g1,0,%g1
-	mulscc	%g1,%o5,%g1
- 	mulscc	%g1,%o5,%g1
- 	mulscc	%g1,%o5,%g1
- 	mulscc	%g1,%o5,%g1
-	mulscc	%g1,%o5,%g1
-	mulscc	%g1,%o5,%g1
-	mulscc	%g1,%o5,%g1
-	mulscc	%g1,%o5,%g1
-	mulscc	%g1,%o5,%g1
-	mulscc	%g1,%o5,%g1
-	mulscc	%g1,%o5,%g1
-	mulscc	%g1,%o5,%g1
-	mulscc	%g1,0,%g1
-	sra	%g1,20,%g4
-	sll	%g1,12,%g1
- 	rd	%y,%g3
-	srl	%g3,20,%g3
-	or	%g1,%g3,%g1
-
-	addcc	%g1,%o0,%g1
-	addx	%g2,%g4,%o0	! add sign-compensation and cy to hi limb
-	addcc	%o2,4,%o2	! loop counter
-	bne,a	LOC(loop0)
-	 ld	[%o1+%o2],%o5
-
-	retl
-	st	%g1,[%o4+%o2]
-
-
-LOC(large):
-	ld	[%o1+%o2],%o5
-	mov	0,%o0
-	sra	%o3,31,%g4	! g4 = mask of ones iff S2_LIMB < 0
-	b	LOC(1)
-	 add	%o4,-4,%o4
 LOC(loop):
-	st	%g3,[%o4+%o2]
-LOC(1):	wr	%g0,%o5,%y
-	and	%o5,%g4,%g2	! g2 = S1_LIMB iff S2_LIMB < 0, else 0
-	andcc	%g0,%g0,%g1
-	mulscc	%g1,%o3,%g1
-	mulscc	%g1,%o3,%g1
-	mulscc	%g1,%o3,%g1
-	mulscc	%g1,%o3,%g1
-	mulscc	%g1,%o3,%g1
-	mulscc	%g1,%o3,%g1
-	mulscc	%g1,%o3,%g1
-	mulscc	%g1,%o3,%g1
-	mulscc	%g1,%o3,%g1
-	mulscc	%g1,%o3,%g1
-	mulscc	%g1,%o3,%g1
-	mulscc	%g1,%o3,%g1
-	mulscc	%g1,%o3,%g1
-	mulscc	%g1,%o3,%g1
-	mulscc	%g1,%o3,%g1
-	mulscc	%g1,%o3,%g1
-	mulscc	%g1,%o3,%g1
-	mulscc	%g1,%o3,%g1
-	mulscc	%g1,%o3,%g1
-	mulscc	%g1,%o3,%g1
-	mulscc	%g1,%o3,%g1
-	mulscc	%g1,%o3,%g1
-	mulscc	%g1,%o3,%g1
-	mulscc	%g1,%o3,%g1
-	mulscc	%g1,%o3,%g1
-	mulscc	%g1,%o3,%g1
-	mulscc	%g1,%o3,%g1
-	mulscc	%g1,%o3,%g1
-	mulscc	%g1,%o3,%g1
-	mulscc	%g1,%o3,%g1
-	mulscc	%g1,%o3,%g1
-	mulscc	%g1,%o3,%g1
-	mulscc	%g1,%g0,%g1
-	rd	%y,%g3
-	addcc	%g3,%o0,%g3
-	addx	%g2,%g1,%o0	! add sign-compensation and cy to hi limb
-	addcc	%o2,4,%o2	! loop counter
-	bne,a	LOC(loop)
-	 ld	[%o1+%o2],%o5
+	addcc	%g3,%g2,%g3	! 1
+	ld	[%o1+4],%o4	! 2
+	st	%g3,[%o0+0]	! 1
+	rd	%y,%g2		! 1
+LOC(loop00):
+	umul	%o4,%o3,%g3	! 2
+	addxcc	%g3,%g2,%g3	! 2
+	ld	[%o1+8],%o4	! 3
+	st	%g3,[%o0+4]	! 2
+	rd	%y,%g2		! 2
+LOC(loop11):
+	umul	%o4,%o3,%g3	! 3
+	addxcc	%g3,%g2,%g3	! 3
+	ld	[%o1+12],%o4	! 4
+	add	%o1,16,%o1
+	st	%g3,[%o0+8]	! 3
+	rd	%y,%g2		! 3
+LOC(loop10):
+	umul	%o4,%o3,%g3	! 4
+	addxcc	%g3,%g2,%g3	! 4
+	ld	[%o1+0],%o4	! 1
+	st	%g3,[%o0+12]	! 4
+	add	%o0,16,%o0
+	rd	%y,%g2		! 4
+	addx	%g0,%g2,%g2
+LOC(loop01):
+	addcc	%o2,-4,%o2
+	bg	LOC(loop)
+	 umul	%o4,%o3,%g3	! 1

+	addcc	%g3,%g2,%g3	! 4
+	st	%g3,[%o0+0]	! 4
+	rd	%y,%g2		! 4
 	retl
-	st	%g3,[%o4+%o2]
+	 addx	%g0,%g2,%o0

 END(__mpn_mul_1)
--- a/sysdeps/sparc/sparc32/rem.S
+++ b/sysdeps/sparc/sparc32/rem.S
@ -1,363 +1,21 @@
-   /* This file is generated from divrem.m4; DO NOT EDIT! */
 /*
- * Division and remainder, from Appendix E of the Sparc Version 8
- * Architecture Manual, with fixes from Gordon Irlam.
+ * Sparc v8 has divide.
 */

-/*
- * Input: dividend and divisor in %o0 and %o1 respectively.
- *
- * m4 parameters:
- *  .rem	name of function to generate
- *  rem		rem=div => %o0 / %o1; rem=rem => %o0 % %o1
- *  true		true=true => signed; true=false => unsigned
- *
- * Algorithm parameters:
- *  N		how many bits per iteration we try to get (4)
- *  WORDSIZE	total number of bits (32)
- *
- * Derived constants:
- *  TOPBITS	number of bits in the top decade of a number
- *
- * Important variables:
- *  Q		the partial quotient under development (initially 0)
- *  R		the remainder so far, initially the dividend
- *  ITER	number of main division loop iterations required;
- *		equal to ceil(log2(quotient) / N).  Note that this
- *		is the log base (2^N) of the quotient.
- *  V		the current comparand, initially divisor*2^(ITER*N-1)
- *
- * Cost:
- *  Current estimate for non-large dividend is
- *	ceil(log2(quotient) / N) * (10 + 7N/2) + C
- *  A large dividend is one greater than 2^(31-TOPBITS) and takes a
- *  different path, as the upper bits of the quotient must be developed
- *  one bit at a time.
- */
-
-
-
 #include <sysdep.h>
-#include <sys/trap.h>

 ENTRY(.rem)
-	! compute sign of result; if neither is negative, no problem
-	orcc	%o1, %o0, %g0	! either negative?
-	bge	2f			! no, go do the divide
-	mov	%o0, %g3		! sign of remainder matches %o0
-	tst	%o1
-	bge	1f
-	tst	%o0
-	! %o1 is definitely negative; %o0 might also be negative
-	bge	2f			! if %o0 not negative...
-	sub	%g0, %o1, %o1	! in any case, make %o1 nonneg
-1:	! %o0 is negative, %o1 is nonnegative
-	sub	%g0, %o0, %o0	! make %o0 nonnegative
-2:

-	! Ready to divide.  Compute size of quotient; scale comparand.
-	orcc	%o1, %g0, %o5
-	bne	1f
-	mov	%o0, %o3
-
-		! Divide by zero trap.  If it returns, return 0 (about as
-		! wrong as possible, but that is what SunOS does...).
-		ta	ST_DIV0
-		retl
-		clr	%o0
-
-1:
-	cmp	%o3, %o5			! if %o1 exceeds %o0, done
-	blu	LOC(got_result)		! (and algorithm fails otherwise)
-	clr	%o2
-	sethi	%hi(1 << (32 - 4 - 1)), %g1
-	cmp	%o3, %g1
-	blu	LOC(not_really_big)
-	clr	%o4
-
-	! Here the dividend is >= 2**(31-N) or so.  We must be careful here,
-	! as our usual N-at-a-shot divide step will cause overflow and havoc.
-	! The number of bits in the result here is N*ITER+SC, where SC <= N.
-	! Compute ITER in an unorthodox manner: know we need to shift V into
-	! the top decade: so do not even bother to compare to R.
-	1:
-		cmp	%o5, %g1
-		bgeu	3f
-		mov	1, %g2
-		sll	%o5, 4, %o5
-		b	1b
-		add	%o4, 1, %o4
-
-	! Now compute %g2.
-	2:	addcc	%o5, %o5, %o5
-		bcc	LOC(not_too_big)
-		add	%g2, 1, %g2
-
-		! We get here if the %o1 overflowed while shifting.
-		! This means that %o3 has the high-order bit set.
-		! Restore %o5 and subtract from %o3.
-		sll	%g1, 4, %g1	! high order bit
-		srl	%o5, 1, %o5		! rest of %o5
-		add	%o5, %g1, %o5
-		b	LOC(do_single_div)
-		sub	%g2, 1, %g2
-
-	LOC(not_too_big):
-	3:	cmp	%o5, %o3
-		blu	2b
-		nop
-		be	LOC(do_single_div)
-		nop
-	/* NB: these are commented out in the V8-Sparc manual as well */
-	/* (I do not understand this) */
-	! %o5 > %o3: went too far: back up 1 step
-	!	srl	%o5, 1, %o5
-	!	dec	%g2
-	! do single-bit divide steps
-	!
-	! We have to be careful here.  We know that %o3 >= %o5, so we can do the
-	! first divide step without thinking.  BUT, the others are conditional,
-	! and are only done if %o3 >= 0.  Because both %o3 and %o5 may have the high-
-	! order bit set in the first step, just falling into the regular
-	! division loop will mess up the first time around.
-	! So we unroll slightly...
-	LOC(do_single_div):
-		subcc	%g2, 1, %g2
-		bl	LOC(end_regular_divide)
-		nop
-		sub	%o3, %o5, %o3
-		mov	1, %o2
-		b	LOC(end_single_divloop)
-		nop
-	LOC(single_divloop):
-		sll	%o2, 1, %o2
-		bl	1f
-		srl	%o5, 1, %o5
-		! %o3 >= 0
-		sub	%o3, %o5, %o3
-		b	2f
-		add	%o2, 1, %o2
-	1:	! %o3 < 0
-		add	%o3, %o5, %o3
-		sub	%o2, 1, %o2
-	2:
-	LOC(end_single_divloop):
-		subcc	%g2, 1, %g2
-		bge	LOC(single_divloop)
-		tst	%o3
-		b,a	LOC(end_regular_divide)
-
-LOC(not_really_big):
-1:
-	sll	%o5, 4, %o5
-	cmp	%o5, %o3
-	bleu	1b
-	addcc	%o4, 1, %o4
-	be	LOC(got_result)
-	sub	%o4, 1, %o4
-
-	tst	%o3	! set up for initial iteration
-LOC(divloop):
-	sll	%o2, 4, %o2
-		! depth 1, accumulated bits 0
-	bl	LOC(1.16)
-	srl	%o5,1,%o5
-	! remainder is positive
-	subcc	%o3,%o5,%o3
-			! depth 2, accumulated bits 1
-	bl	LOC(2.17)
-	srl	%o5,1,%o5
-	! remainder is positive
-	subcc	%o3,%o5,%o3
-			! depth 3, accumulated bits 3
-	bl	LOC(3.19)
-	srl	%o5,1,%o5
-	! remainder is positive
-	subcc	%o3,%o5,%o3
-			! depth 4, accumulated bits 7
-	bl	LOC(4.23)
-	srl	%o5,1,%o5
-	! remainder is positive
-	subcc	%o3,%o5,%o3
-		b	9f
-		add	%o2, (7*2+1), %o2
-
-LOC(4.23):
-	! remainder is negative
-	addcc	%o3,%o5,%o3
-		b	9f
-		add	%o2, (7*2-1), %o2
-
-
-LOC(3.19):
-	! remainder is negative
-	addcc	%o3,%o5,%o3
-			! depth 4, accumulated bits 5
-	bl	LOC(4.21)
-	srl	%o5,1,%o5
-	! remainder is positive
-	subcc	%o3,%o5,%o3
-		b	9f
-		add	%o2, (5*2+1), %o2
-
-LOC(4.21):
-	! remainder is negative
-	addcc	%o3,%o5,%o3
-		b	9f
-		add	%o2, (5*2-1), %o2
-
-
-
-LOC(2.17):
-	! remainder is negative
-	addcc	%o3,%o5,%o3
-			! depth 3, accumulated bits 1
-	bl	LOC(3.17)
-	srl	%o5,1,%o5
-	! remainder is positive
-	subcc	%o3,%o5,%o3
-			! depth 4, accumulated bits 3
-	bl	LOC(4.19)
-	srl	%o5,1,%o5
-	! remainder is positive
-	subcc	%o3,%o5,%o3
-		b	9f
-		add	%o2, (3*2+1), %o2
-
-LOC(4.19):
-	! remainder is negative
-	addcc	%o3,%o5,%o3
-		b	9f
-		add	%o2, (3*2-1), %o2
-
-
-LOC(3.17):
-	! remainder is negative
-	addcc	%o3,%o5,%o3
-			! depth 4, accumulated bits 1
-	bl	LOC(4.17)
-	srl	%o5,1,%o5
-	! remainder is positive
-	subcc	%o3,%o5,%o3
-		b	9f
-		add	%o2, (1*2+1), %o2
-
-LOC(4.17):
-	! remainder is negative
-	addcc	%o3,%o5,%o3
-		b	9f
-		add	%o2, (1*2-1), %o2
-
-
-
-
-LOC(1.16):
-	! remainder is negative
-	addcc	%o3,%o5,%o3
-			! depth 2, accumulated bits -1
-	bl	LOC(2.15)
-	srl	%o5,1,%o5
-	! remainder is positive
-	subcc	%o3,%o5,%o3
-			! depth 3, accumulated bits -1
-	bl	LOC(3.15)
-	srl	%o5,1,%o5
-	! remainder is positive
-	subcc	%o3,%o5,%o3
-			! depth 4, accumulated bits -1
-	bl	LOC(4.15)
-	srl	%o5,1,%o5
-	! remainder is positive
-	subcc	%o3,%o5,%o3
-		b	9f
-		add	%o2, (-1*2+1), %o2
-
-LOC(4.15):
-	! remainder is negative
-	addcc	%o3,%o5,%o3
-		b	9f
-		add	%o2, (-1*2-1), %o2
-
-
-LOC(3.15):
-	! remainder is negative
-	addcc	%o3,%o5,%o3
-			! depth 4, accumulated bits -3
-	bl	LOC(4.13)
-	srl	%o5,1,%o5
-	! remainder is positive
-	subcc	%o3,%o5,%o3
-		b	9f
-		add	%o2, (-3*2+1), %o2
-
-LOC(4.13):
-	! remainder is negative
-	addcc	%o3,%o5,%o3
-		b	9f
-		add	%o2, (-3*2-1), %o2
-
-
-
-LOC(2.15):
-	! remainder is negative
-	addcc	%o3,%o5,%o3
-			! depth 3, accumulated bits -3
-	bl	LOC(3.13)
-	srl	%o5,1,%o5
-	! remainder is positive
-	subcc	%o3,%o5,%o3
-			! depth 4, accumulated bits -5
-	bl	LOC(4.11)
-	srl	%o5,1,%o5
-	! remainder is positive
-	subcc	%o3,%o5,%o3
-		b	9f
-		add	%o2, (-5*2+1), %o2
-
-LOC(4.11):
-	! remainder is negative
-	addcc	%o3,%o5,%o3
-		b	9f
-		add	%o2, (-5*2-1), %o2
-
-
-LOC(3.13):
-	! remainder is negative
-	addcc	%o3,%o5,%o3
-			! depth 4, accumulated bits -7
-	bl	LOC(4.9)
-	srl	%o5,1,%o5
-	! remainder is positive
-	subcc	%o3,%o5,%o3
-		b	9f
-		add	%o2, (-7*2+1), %o2
-
-LOC(4.9):
-	! remainder is negative
-	addcc	%o3,%o5,%o3
-		b	9f
-		add	%o2, (-7*2-1), %o2
-
-
-
-
-9:
-LOC(end_regular_divide):
-	subcc	%o4, 1, %o4
-	bge	LOC(divloop)
-	tst	%o3
-	bl,a	LOC(got_result)
-	! non-restoring fixup here (one instruction only!)
-	add	%o3, %o1, %o3
-
-
-LOC(got_result):
-	! check to see if answer should be < 0
-	tst	%g3
-	bl,a	1f
-	sub %g0, %o3, %o3
-1:
+	sra	%o0, 31, %o2
+	wr	%o2, 0, %y
+	nop
+	nop
+	nop
+	sdivcc	%o0, %o1, %o2
+	bvs,a	1f
+	 xnor	%o2, %g0, %o2
+1:	smul	%o2, %o1, %o2
 	retl
-	mov %o3, %o0
+	 sub	%o0, %o2, %o0

 END(.rem)
--- a/sysdeps/sparc/sparc32/sdiv.S
+++ b/sysdeps/sparc/sparc32/sdiv.S
@ -1,363 +1,20 @@
-   /* This file is generated from divrem.m4; DO NOT EDIT! */
 /*
- * Division and remainder, from Appendix E of the Sparc Version 8
- * Architecture Manual, with fixes from Gordon Irlam.
+ * Sparc v8 has divide.
 */

-/*
- * Input: dividend and divisor in %o0 and %o1 respectively.
- *
- * m4 parameters:
- *  .div	name of function to generate
- *  div		div=div => %o0 / %o1; div=rem => %o0 % %o1
- *  true		true=true => signed; true=false => unsigned
- *
- * Algorithm parameters:
- *  N		how many bits per iteration we try to get (4)
- *  WORDSIZE	total number of bits (32)
- *
- * Derived constants:
- *  TOPBITS	number of bits in the top decade of a number
- *
- * Important variables:
- *  Q		the partial quotient under development (initially 0)
- *  R		the remainder so far, initially the dividend
- *  ITER	number of main division loop iterations required;
- *		equal to ceil(log2(quotient) / N).  Note that this
- *		is the log base (2^N) of the quotient.
- *  V		the current comparand, initially divisor*2^(ITER*N-1)
- *
- * Cost:
- *  Current estimate for non-large dividend is
- *	ceil(log2(quotient) / N) * (10 + 7N/2) + C
- *  A large dividend is one greater than 2^(31-TOPBITS) and takes a
- *  different path, as the upper bits of the quotient must be developed
- *  one bit at a time.
- */
-
-
-
 #include <sysdep.h>
-#include <sys/trap.h>

 ENTRY(.div)
-	! compute sign of result; if neither is negative, no problem
-	orcc	%o1, %o0, %g0	! either negative?
-	bge	2f			! no, go do the divide
-	xor	%o1, %o0, %g3	! compute sign in any case
-	tst	%o1
-	bge	1f
-	tst	%o0
-	! %o1 is definitely negative; %o0 might also be negative
-	bge	2f			! if %o0 not negative...
-	sub	%g0, %o1, %o1	! in any case, make %o1 nonneg
-1:	! %o0 is negative, %o1 is nonnegative
-	sub	%g0, %o0, %o0	! make %o0 nonnegative
-2:

-	! Ready to divide.  Compute size of quotient; scale comparand.
-	orcc	%o1, %g0, %o5
-	bne	1f
-	mov	%o0, %o3
-
-		! Divide by zero trap.  If it returns, return 0 (about as
-		! wrong as possible, but that is what SunOS does...).
-		ta	ST_DIV0
-		retl
-		clr	%o0
-
-1:
-	cmp	%o3, %o5			! if %o1 exceeds %o0, done
-	blu	LOC(got_result)		! (and algorithm fails otherwise)
-	clr	%o2
-	sethi	%hi(1 << (32 - 4 - 1)), %g1
-	cmp	%o3, %g1
-	blu	LOC(not_really_big)
-	clr	%o4
-
-	! Here the dividend is >= 2**(31-N) or so.  We must be careful here,
-	! as our usual N-at-a-shot divide step will cause overflow and havoc.
-	! The number of bits in the result here is N*ITER+SC, where SC <= N.
-	! Compute ITER in an unorthodox manner: know we need to shift V into
-	! the top decade: so do not even bother to compare to R.
-	1:
-		cmp	%o5, %g1
-		bgeu	3f
-		mov	1, %g2
-		sll	%o5, 4, %o5
-		b	1b
-		add	%o4, 1, %o4
-
-	! Now compute %g2.
-	2:	addcc	%o5, %o5, %o5
-		bcc	LOC(not_too_big)
-		add	%g2, 1, %g2
-
-		! We get here if the %o1 overflowed while shifting.
-		! This means that %o3 has the high-order bit set.
-		! Restore %o5 and subtract from %o3.
-		sll	%g1, 4, %g1	! high order bit
-		srl	%o5, 1, %o5		! rest of %o5
-		add	%o5, %g1, %o5
-		b	LOC(do_single_div)
-		sub	%g2, 1, %g2
-
-	LOC(not_too_big):
-	3:	cmp	%o5, %o3
-		blu	2b
-		nop
-		be	LOC(do_single_div)
-		nop
-	/* NB: these are commented out in the V8-Sparc manual as well */
-	/* (I do not understand this) */
-	! %o5 > %o3: went too far: back up 1 step
-	!	srl	%o5, 1, %o5
-	!	dec	%g2
-	! do single-bit divide steps
-	!
-	! We have to be careful here.  We know that %o3 >= %o5, so we can do the
-	! first divide step without thinking.  BUT, the others are conditional,
-	! and are only done if %o3 >= 0.  Because both %o3 and %o5 may have the high-
-	! order bit set in the first step, just falling into the regular
-	! division loop will mess up the first time around.
-	! So we unroll slightly...
-	LOC(do_single_div):
-		subcc	%g2, 1, %g2
-		bl	LOC(end_regular_divide)
-		nop
-		sub	%o3, %o5, %o3
-		mov	1, %o2
-		b	LOC(end_single_divloop)
-		nop
-	LOC(single_divloop):
-		sll	%o2, 1, %o2
-		bl	1f
-		srl	%o5, 1, %o5
-		! %o3 >= 0
-		sub	%o3, %o5, %o3
-		b	2f
-		add	%o2, 1, %o2
-	1:	! %o3 < 0
-		add	%o3, %o5, %o3
-		sub	%o2, 1, %o2
-	2:
-	LOC(end_single_divloop):
-		subcc	%g2, 1, %g2
-		bge	LOC(single_divloop)
-		tst	%o3
-		b,a	LOC(end_regular_divide)
-
-LOC(not_really_big):
-1:
-	sll	%o5, 4, %o5
-	cmp	%o5, %o3
-	bleu	1b
-	addcc	%o4, 1, %o4
-	be	LOC(got_result)
-	sub	%o4, 1, %o4
-
-	tst	%o3	! set up for initial iteration
-LOC(divloop):
-	sll	%o2, 4, %o2
-		! depth 1, accumulated bits 0
-	bl	LOC(1.16)
-	srl	%o5,1,%o5
-	! remainder is positive
-	subcc	%o3,%o5,%o3
-			! depth 2, accumulated bits 1
-	bl	LOC(2.17)
-	srl	%o5,1,%o5
-	! remainder is positive
-	subcc	%o3,%o5,%o3
-			! depth 3, accumulated bits 3
-	bl	LOC(3.19)
-	srl	%o5,1,%o5
-	! remainder is positive
-	subcc	%o3,%o5,%o3
-			! depth 4, accumulated bits 7
-	bl	LOC(4.23)
-	srl	%o5,1,%o5
-	! remainder is positive
-	subcc	%o3,%o5,%o3
-		b	9f
-		add	%o2, (7*2+1), %o2
-
-LOC(4.23):
-	! remainder is negative
-	addcc	%o3,%o5,%o3
-		b	9f
-		add	%o2, (7*2-1), %o2
-
-
-LOC(3.19):
-	! remainder is negative
-	addcc	%o3,%o5,%o3
-			! depth 4, accumulated bits 5
-	bl	LOC(4.21)
-	srl	%o5,1,%o5
-	! remainder is positive
-	subcc	%o3,%o5,%o3
-		b	9f
-		add	%o2, (5*2+1), %o2
-
-LOC(4.21):
-	! remainder is negative
-	addcc	%o3,%o5,%o3
-		b	9f
-		add	%o2, (5*2-1), %o2
-
-
-
-LOC(2.17):
-	! remainder is negative
-	addcc	%o3,%o5,%o3
-			! depth 3, accumulated bits 1
-	bl	LOC(3.17)
-	srl	%o5,1,%o5
-	! remainder is positive
-	subcc	%o3,%o5,%o3
-			! depth 4, accumulated bits 3
-	bl	LOC(4.19)
-	srl	%o5,1,%o5
-	! remainder is positive
-	subcc	%o3,%o5,%o3
-		b	9f
-		add	%o2, (3*2+1), %o2
-
-LOC(4.19):
-	! remainder is negative
-	addcc	%o3,%o5,%o3
-		b	9f
-		add	%o2, (3*2-1), %o2
-
-
-LOC(3.17):
-	! remainder is negative
-	addcc	%o3,%o5,%o3
-			! depth 4, accumulated bits 1
-	bl	LOC(4.17)
-	srl	%o5,1,%o5
-	! remainder is positive
-	subcc	%o3,%o5,%o3
-		b	9f
-		add	%o2, (1*2+1), %o2
-
-LOC(4.17):
-	! remainder is negative
-	addcc	%o3,%o5,%o3
-		b	9f
-		add	%o2, (1*2-1), %o2
-
-
-
-
-LOC(1.16):
-	! remainder is negative
-	addcc	%o3,%o5,%o3
-			! depth 2, accumulated bits -1
-	bl	LOC(2.15)
-	srl	%o5,1,%o5
-	! remainder is positive
-	subcc	%o3,%o5,%o3
-			! depth 3, accumulated bits -1
-	bl	LOC(3.15)
-	srl	%o5,1,%o5
-	! remainder is positive
-	subcc	%o3,%o5,%o3
-			! depth 4, accumulated bits -1
-	bl	LOC(4.15)
-	srl	%o5,1,%o5
-	! remainder is positive
-	subcc	%o3,%o5,%o3
-		b	9f
-		add	%o2, (-1*2+1), %o2
-
-LOC(4.15):
-	! remainder is negative
-	addcc	%o3,%o5,%o3
-		b	9f
-		add	%o2, (-1*2-1), %o2
-
-
-LOC(3.15):
-	! remainder is negative
-	addcc	%o3,%o5,%o3
-			! depth 4, accumulated bits -3
-	bl	LOC(4.13)
-	srl	%o5,1,%o5
-	! remainder is positive
-	subcc	%o3,%o5,%o3
-		b	9f
-		add	%o2, (-3*2+1), %o2
-
-LOC(4.13):
-	! remainder is negative
-	addcc	%o3,%o5,%o3
-		b	9f
-		add	%o2, (-3*2-1), %o2
-
-
-
-LOC(2.15):
-	! remainder is negative
-	addcc	%o3,%o5,%o3
-			! depth 3, accumulated bits -3
-	bl	LOC(3.13)
-	srl	%o5,1,%o5
-	! remainder is positive
-	subcc	%o3,%o5,%o3
-			! depth 4, accumulated bits -5
-	bl	LOC(4.11)
-	srl	%o5,1,%o5
-	! remainder is positive
-	subcc	%o3,%o5,%o3
-		b	9f
-		add	%o2, (-5*2+1), %o2
-
-LOC(4.11):
-	! remainder is negative
-	addcc	%o3,%o5,%o3
-		b	9f
-		add	%o2, (-5*2-1), %o2
-
-
-LOC(3.13):
-	! remainder is negative
-	addcc	%o3,%o5,%o3
-			! depth 4, accumulated bits -7
-	bl	LOC(4.9)
-	srl	%o5,1,%o5
-	! remainder is positive
-	subcc	%o3,%o5,%o3
-		b	9f
-		add	%o2, (-7*2+1), %o2
-
-LOC(4.9):
-	! remainder is negative
-	addcc	%o3,%o5,%o3
-		b	9f
-		add	%o2, (-7*2-1), %o2
-
-
-
-
-9:
-LOC(end_regular_divide):
-	subcc	%o4, 1, %o4
-	bge	LOC(divloop)
-	tst	%o3
-	bl,a	LOC(got_result)
-	! non-restoring fixup here (one instruction only!)
-	sub	%o2, 1, %o2
-
-
-LOC(got_result):
-	! check to see if answer should be < 0
-	tst	%g3
-	bl,a	1f
-	sub %g0, %o2, %o2
-1:
-	retl
-	mov %o2, %o0
+	sra	%o0, 31, %o2
+	wr	%o2, 0, %y
+	nop
+	nop
+	nop
+	sdivcc	%o0, %o1, %o0
+	bvs,a	1f
+	 xnor	%o0, %g0, %o0
+1:	retl
+	 nop

 END(.div)
--- a/sysdeps/sparc/sparc32/sparcv8/Makefile
+++ b/sysdeps/sparc/sparc32/sparcv8/Makefile
@ -1 +0,0 @@
-sysdep-CFLAGS += -mcpu=v8
--- a/sysdeps/sparc/sparc32/sparcv8/addmul_1.S
+++ b/sysdeps/sparc/sparc32/sparcv8/addmul_1.S
@ -1,118 +0,0 @@
-! SPARC v8 __mpn_addmul_1 -- Multiply a limb vector with a limb and
-! add the result to a second limb vector.
-
-! Copyright (C) 1992-2019 Free Software Foundation, Inc.
-
-! This file is part of the GNU MP Library.
-
-! The GNU MP Library is free software; you can redistribute it and/or modify
-! it under the terms of the GNU Lesser General Public License as published by
-! the Free Software Foundation; either version 2.1 of the License, or (at your
-! option) any later version.
-
-! The GNU MP Library is distributed in the hope that it will be useful, but
-! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-! or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-! License for more details.
-
-! You should have received a copy of the GNU Lesser General Public License
-! along with the GNU MP Library; see the file COPYING.LIB.  If not,
-! see <https://www.gnu.org/licenses/>.
-
-
-! INPUT PARAMETERS
-! res_ptr	o0
-! s1_ptr	o1
-! size		o2
-! s2_limb	o3
-
-#include <sysdep.h>
-
-ENTRY(__mpn_addmul_1)
-	ld	[%o1+0],%o4	! 1
-	sll	%o2,4,%g1
-	orcc	%g0,%g0,%g2
-	mov	%o7,%g4			! Save return address register
-	and	%g1,(4-1)<<4,%g1
-1:	call	2f
-	 add	%o7,3f-1b,%g3
-2:	jmp	%g3+%g1
-	 mov	%g4,%o7			! Restore return address register
-
-	.align	4
-3:
-LOC(00):
-	add	%o0,-4,%o0
-	b	LOC(loop00)		/* 4, 8, 12, ... */
-	 add	%o1,-4,%o1
-	nop
-LOC(01):
-	b	LOC(loop01)		/* 1, 5, 9, ... */
-	 nop
-	nop
-	nop
-LOC(10):
-	add	%o0,-12,%o0	/* 2, 6, 10, ... */
-	b	LOC(loop10)
-	 add	%o1,4,%o1
-	nop
-LOC(11):
-	add	%o0,-8,%o0	/* 3, 7, 11, ... */
-	b	LOC(loop11)
-	 add	%o1,-8,%o1
-	nop
-
-LOC(loop):
-	addcc	%g3,%g2,%g3	! 1
-	ld	[%o1+4],%o4	! 2
-	rd	%y,%g2		! 1
-	addx	%g0,%g2,%g2
-	ld	[%o0+0],%g1	! 2
-	addcc	%g1,%g3,%g3
-	st	%g3,[%o0+0]	! 1
-LOC(loop00):
-	umul	%o4,%o3,%g3	! 2
-	ld	[%o0+4],%g1	! 2
-	addxcc	%g3,%g2,%g3	! 2
-	ld	[%o1+8],%o4	! 3
-	rd	%y,%g2		! 2
-	addx	%g0,%g2,%g2
-	nop
-	addcc	%g1,%g3,%g3
-	st	%g3,[%o0+4]	! 2
-LOC(loop11):
-	umul	%o4,%o3,%g3	! 3
-	addxcc	%g3,%g2,%g3	! 3
-	ld	[%o1+12],%o4	! 4
-	rd	%y,%g2		! 3
-	add	%o1,16,%o1
-	addx	%g0,%g2,%g2
-	ld	[%o0+8],%g1	! 2
-	addcc	%g1,%g3,%g3
-	st	%g3,[%o0+8]	! 3
-LOC(loop10):
-	umul	%o4,%o3,%g3	! 4
-	addxcc	%g3,%g2,%g3	! 4
-	ld	[%o1+0],%o4	! 1
-	rd	%y,%g2		! 4
-	addx	%g0,%g2,%g2
-	ld	[%o0+12],%g1	! 2
-	addcc	%g1,%g3,%g3
-	st	%g3,[%o0+12]	! 4
-	add	%o0,16,%o0
-	addx	%g0,%g2,%g2
-LOC(loop01):
-	addcc	%o2,-4,%o2
-	bg	LOC(loop)
-	 umul	%o4,%o3,%g3	! 1
-
-	addcc	%g3,%g2,%g3	! 4
-	rd	%y,%g2		! 4
-	addx	%g0,%g2,%g2
-	ld	[%o0+0],%g1	! 2
-	addcc	%g1,%g3,%g3
-	st	%g3,[%o0+0]	! 4
-	retl
-	 addx	%g0,%g2,%o0
-
-END(__mpn_addmul_1)
--- a/sysdeps/sparc/sparc32/sparcv8/dotmul.S
+++ b/sysdeps/sparc/sparc32/sparcv8/dotmul.S
@ -1,13 +0,0 @@
-/*
- * Sparc v8 has multiply.
- */
-
-#include <sysdep.h>
-
-ENTRY(.mul)
-
-	smul	%o0, %o1, %o0
-	retl
-	 rd	%y, %o1
-
-END(.mul)
--- a/sysdeps/sparc/sparc32/sparcv8/mul_1.S
+++ b/sysdeps/sparc/sparc32/sparcv8/mul_1.S
@ -1,102 +0,0 @@
-! SPARC v8 __mpn_mul_1 -- Multiply a limb vector with a single limb and
-! store the product in a second limb vector.
-
-! Copyright (C) 1992-2019 Free Software Foundation, Inc.
-
-! This file is part of the GNU MP Library.
-
-! The GNU MP Library is free software; you can redistribute it and/or modify
-! it under the terms of the GNU Lesser General Public License as published by
-! the Free Software Foundation; either version 2.1 of the License, or (at your
-! option) any later version.
-
-! The GNU MP Library is distributed in the hope that it will be useful, but
-! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-! or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-! License for more details.
-
-! You should have received a copy of the GNU Lesser General Public License
-! along with the GNU MP Library; see the file COPYING.LIB.  If not,
-! see <https://www.gnu.org/licenses/>.
-
-
-! INPUT PARAMETERS
-! res_ptr	o0
-! s1_ptr	o1
-! size		o2
-! s2_limb	o3
-
-#include <sysdep.h>
-
-ENTRY(__mpn_mul_1)
-	sll	%o2,4,%g1
-	mov	%o7,%g4			! Save return address register
-	and	%g1,(4-1)<<4,%g1
-1:	call	2f
-	 add	%o7,3f-1b,%g3
-2:	mov	%g4,%o7			! Restore return address register
-	jmp	%g3+%g1
-	 ld	[%o1+0],%o4	! 1
-
-	.align	4
-3:
-LOC(00):
-	add	%o0,-4,%o0
-	add	%o1,-4,%o1
-	b	LOC(loop00)		/* 4, 8, 12, ... */
-	 orcc	%g0,%g0,%g2
-LOC(01):
-	b	LOC(loop01)		/* 1, 5, 9, ... */
-	 orcc	%g0,%g0,%g2
-	nop
-	nop
-LOC(10):
-	add	%o0,-12,%o0	/* 2, 6, 10, ... */
-	add	%o1,4,%o1
-	b	LOC(loop10)
-	 orcc	%g0,%g0,%g2
-	nop
-LOC(11):
-	add	%o0,-8,%o0	/* 3, 7, 11, ... */
-	add	%o1,-8,%o1
-	b	LOC(loop11)
-	 orcc	%g0,%g0,%g2
-
-LOC(loop):
-	addcc	%g3,%g2,%g3	! 1
-	ld	[%o1+4],%o4	! 2
-	st	%g3,[%o0+0]	! 1
-	rd	%y,%g2		! 1
-LOC(loop00):
-	umul	%o4,%o3,%g3	! 2
-	addxcc	%g3,%g2,%g3	! 2
-	ld	[%o1+8],%o4	! 3
-	st	%g3,[%o0+4]	! 2
-	rd	%y,%g2		! 2
-LOC(loop11):
-	umul	%o4,%o3,%g3	! 3
-	addxcc	%g3,%g2,%g3	! 3
-	ld	[%o1+12],%o4	! 4
-	add	%o1,16,%o1
-	st	%g3,[%o0+8]	! 3
-	rd	%y,%g2		! 3
-LOC(loop10):
-	umul	%o4,%o3,%g3	! 4
-	addxcc	%g3,%g2,%g3	! 4
-	ld	[%o1+0],%o4	! 1
-	st	%g3,[%o0+12]	! 4
-	add	%o0,16,%o0
-	rd	%y,%g2		! 4
-	addx	%g0,%g2,%g2
-LOC(loop01):
-	addcc	%o2,-4,%o2
-	bg	LOC(loop)
-	 umul	%o4,%o3,%g3	! 1
-
-	addcc	%g3,%g2,%g3	! 4
-	st	%g3,[%o0+0]	! 4
-	rd	%y,%g2		! 4
-	retl
-	 addx	%g0,%g2,%o0
-
-END(__mpn_mul_1)
--- a/sysdeps/sparc/sparc32/sparcv8/rem.S
+++ b/sysdeps/sparc/sparc32/sparcv8/rem.S
@ -1,21 +0,0 @@
-/*
- * Sparc v8 has divide.
- */
-
-#include <sysdep.h>
-
-ENTRY(.rem)
-
-	sra	%o0, 31, %o2
-	wr	%o2, 0, %y
-	nop
-	nop
-	nop
-	sdivcc	%o0, %o1, %o2
-	bvs,a	1f
-	 xnor	%o2, %g0, %o2
-1:	smul	%o2, %o1, %o2
-	retl
-	 sub	%o0, %o2, %o0
-
-END(.rem)
--- a/sysdeps/sparc/sparc32/sparcv8/sdiv.S
+++ b/sysdeps/sparc/sparc32/sparcv8/sdiv.S
@ -1,20 +0,0 @@
-/*
- * Sparc v8 has divide.
- */
-
-#include <sysdep.h>
-
-ENTRY(.div)
-
-	sra	%o0, 31, %o2
-	wr	%o2, 0, %y
-	nop
-	nop
-	nop
-	sdivcc	%o0, %o1, %o0
-	bvs,a	1f
-	 xnor	%o0, %g0, %o0
-1:	retl
-	 nop
-
-END(.div)
--- a/sysdeps/sparc/sparc32/sparcv8/submul_1.S
+++ b/sysdeps/sparc/sparc32/sparcv8/submul_1.S
@ -1,57 +0,0 @@
-! SPARC v8 __mpn_submul_1 -- Multiply a limb vector with a limb and
-! subtract the result from a second limb vector.
-
-! Copyright (C) 1992-2019 Free Software Foundation, Inc.
-
-! This file is part of the GNU MP Library.
-
-! The GNU MP Library is free software; you can redistribute it and/or modify
-! it under the terms of the GNU Lesser General Public License as published by
-! the Free Software Foundation; either version 2.1 of the License, or (at your
-! option) any later version.
-
-! The GNU MP Library is distributed in the hope that it will be useful, but
-! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-! or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-! License for more details.
-
-! You should have received a copy of the GNU Lesser General Public License
-! along with the GNU MP Library; see the file COPYING.LIB.  If not,
-! see <https://www.gnu.org/licenses/>.
-
-
-! INPUT PARAMETERS
-! res_ptr	o0
-! s1_ptr	o1
-! size		o2
-! s2_limb	o3
-
-#include <sysdep.h>
-
-ENTRY(__mpn_submul_1)
-	sub	%g0,%o2,%o2		! negate ...
-	sll	%o2,2,%o2		! ... and scale size
-	sub	%o1,%o2,%o1		! o1 is offset s1_ptr
-	sub	%o0,%o2,%g1		! g1 is offset res_ptr
-
-	mov	0,%o0			! clear cy_limb
-
-LOC(loop):
-	ld	[%o1+%o2],%o4
-	ld	[%g1+%o2],%g2
-	umul	%o4,%o3,%o5
-	rd	%y,%g3
-	addcc	%o5,%o0,%o5
-	addx	%g3,0,%o0
-	subcc	%g2,%o5,%g2
-	addx	%o0,0,%o0
-	st	%g2,[%g1+%o2]
-
-	addcc	%o2,4,%o2
-	bne	LOC(loop)
-	 nop
-
-	retl
-	 nop
-
-END(__mpn_submul_1)
--- a/sysdeps/sparc/sparc32/sparcv8/udiv.S
+++ b/sysdeps/sparc/sparc32/sparcv8/udiv.S
@ -1,16 +0,0 @@
-/*
- * Sparc v8 has divide.
- */
-
-#include <sysdep.h>
-
-ENTRY(.udiv)
-
-	wr	%g0, 0, %y
-	nop
-	nop
-	retl
-	 udiv	%o0, %o1, %o0
-
-END(.udiv)
-strong_alias (.udiv, __wrap_.udiv)
--- a/sysdeps/sparc/sparc32/sparcv8/umul.S
+++ b/sysdeps/sparc/sparc32/sparcv8/umul.S
@ -1,13 +0,0 @@
-/*
- * Sparc v8 has multiply.
- */
-
-#include <sysdep.h>
-
-ENTRY(.umul)
-
-	umul	%o0, %o1, %o0
-	retl
-	 rd	%y, %o1
-
-END(.umul)
--- a/sysdeps/sparc/sparc32/sparcv8/urem.S
+++ b/sysdeps/sparc/sparc32/sparcv8/urem.S
@ -1,18 +0,0 @@
-/*
- * Sparc v8 has divide.
- */
-
-#include <sysdep.h>
-
-ENTRY(.urem)
-
-	wr	%g0, 0, %y
-	nop
-	nop
-	nop
-	udiv	%o0, %o1, %o2
-	umul	%o2, %o1, %o2
-	retl
-	 sub	%o0, %o2, %o0
-
-END(.urem)
--- a/sysdeps/sparc/sparc32/submul_1.S
+++ b/sysdeps/sparc/sparc32/submul_1.S
@ -1,146 +1,57 @@
-! SPARC __mpn_submul_1 -- Multiply a limb vector with a limb and subtract
-! the result from a second limb vector.
-!
+! SPARC v8 __mpn_submul_1 -- Multiply a limb vector with a limb and
+! subtract the result from a second limb vector.
+
 ! Copyright (C) 1992-2019 Free Software Foundation, Inc.
-!
+
 ! This file is part of the GNU MP Library.
-!
+
 ! The GNU MP Library is free software; you can redistribute it and/or modify
 ! it under the terms of the GNU Lesser General Public License as published by
 ! the Free Software Foundation; either version 2.1 of the License, or (at your
 ! option) any later version.
-!
+
 ! The GNU MP Library is distributed in the hope that it will be useful, but
 ! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
 ! or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
 ! License for more details.
-!
+
 ! You should have received a copy of the GNU Lesser General Public License
 ! along with the GNU MP Library; see the file COPYING.LIB.  If not,
 ! see <https://www.gnu.org/licenses/>.


 ! INPUT PARAMETERS
-! RES_PTR	o0
-! S1_PTR	o1
-! SIZE		o2
-! S2_LIMB	o3
+! res_ptr	o0
+! s1_ptr	o1
+! size		o2
+! s2_limb	o3

 #include <sysdep.h>

 ENTRY(__mpn_submul_1)
-	! Make S1_PTR and RES_PTR point at the end of their blocks
-	! and put (- 4 x SIZE) in index/loop counter.
-	sll	%o2,2,%o2
-	add	%o0,%o2,%o4	! RES_PTR in o4 since o0 is retval
-	add	%o1,%o2,%o1
-	sub	%g0,%o2,%o2
+	sub	%g0,%o2,%o2		! negate ...
+	sll	%o2,2,%o2		! ... and scale size
+	sub	%o1,%o2,%o1		! o1 is offset s1_ptr
+	sub	%o0,%o2,%g1		! g1 is offset res_ptr

-	cmp	%o3,0xfff
-	bgu	LOC(large)
-	nop
+	mov	0,%o0			! clear cy_limb

-	ld	[%o1+%o2],%o5
-	mov	0,%o0
-	b	LOC(0)
-	 add	%o4,-4,%o4
-LOC(loop0):
-	subcc	%o5,%g1,%g1
-	ld	[%o1+%o2],%o5
-	addx	%o0,%g0,%o0
-	st	%g1,[%o4+%o2]
-LOC(0):	wr	%g0,%o3,%y
-	sra	%o5,31,%g2
-	and	%o3,%g2,%g2
-	andcc	%g1,0,%g1
-	mulscc	%g1,%o5,%g1
- 	mulscc	%g1,%o5,%g1
- 	mulscc	%g1,%o5,%g1
- 	mulscc	%g1,%o5,%g1
-	mulscc	%g1,%o5,%g1
-	mulscc	%g1,%o5,%g1
-	mulscc	%g1,%o5,%g1
-	mulscc	%g1,%o5,%g1
-	mulscc	%g1,%o5,%g1
-	mulscc	%g1,%o5,%g1
-	mulscc	%g1,%o5,%g1
-	mulscc	%g1,%o5,%g1
-	mulscc	%g1,0,%g1
-	sra	%g1,20,%g4
-	sll	%g1,12,%g1
- 	rd	%y,%g3
-	srl	%g3,20,%g3
-	or	%g1,%g3,%g1
-
-	addcc	%g1,%o0,%g1
-	addx	%g2,%g4,%o0	! add sign-compensation and cy to hi limb
-	addcc	%o2,4,%o2	! loop counter
-	bne	LOC(loop0)
-	 ld	[%o4+%o2],%o5
-
-	subcc	%o5,%g1,%g1
-	addx	%o0,%g0,%o0
-	retl
-	st	%g1,[%o4+%o2]
-
-
-LOC(large):
-	ld	[%o1+%o2],%o5
-	mov	0,%o0
-	sra	%o3,31,%g4	! g4 = mask of ones iff S2_LIMB < 0
-	b	LOC(1)
-	 add	%o4,-4,%o4
 LOC(loop):
-	subcc	%o5,%g3,%g3
-	ld	[%o1+%o2],%o5
-	addx	%o0,%g0,%o0
-	st	%g3,[%o4+%o2]
-LOC(1):	wr	%g0,%o5,%y
-	and	%o5,%g4,%g2
-	andcc	%g0,%g0,%g1
-	mulscc	%g1,%o3,%g1
-	mulscc	%g1,%o3,%g1
-	mulscc	%g1,%o3,%g1
-	mulscc	%g1,%o3,%g1
-	mulscc	%g1,%o3,%g1
-	mulscc	%g1,%o3,%g1
-	mulscc	%g1,%o3,%g1
-	mulscc	%g1,%o3,%g1
-	mulscc	%g1,%o3,%g1
-	mulscc	%g1,%o3,%g1
-	mulscc	%g1,%o3,%g1
-	mulscc	%g1,%o3,%g1
-	mulscc	%g1,%o3,%g1
-	mulscc	%g1,%o3,%g1
-	mulscc	%g1,%o3,%g1
-	mulscc	%g1,%o3,%g1
-	mulscc	%g1,%o3,%g1
-	mulscc	%g1,%o3,%g1
-	mulscc	%g1,%o3,%g1
-	mulscc	%g1,%o3,%g1
-	mulscc	%g1,%o3,%g1
-	mulscc	%g1,%o3,%g1
-	mulscc	%g1,%o3,%g1
-	mulscc	%g1,%o3,%g1
-	mulscc	%g1,%o3,%g1
-	mulscc	%g1,%o3,%g1
-	mulscc	%g1,%o3,%g1
-	mulscc	%g1,%o3,%g1
-	mulscc	%g1,%o3,%g1
-	mulscc	%g1,%o3,%g1
-	mulscc	%g1,%o3,%g1
-	mulscc	%g1,%o3,%g1
-	mulscc	%g1,%g0,%g1
+	ld	[%o1+%o2],%o4
+	ld	[%g1+%o2],%g2
+	umul	%o4,%o3,%o5
 	rd	%y,%g3
-	addcc	%g3,%o0,%g3
-	addx	%g2,%g1,%o0
+	addcc	%o5,%o0,%o5
+	addx	%g3,0,%o0
+	subcc	%g2,%o5,%g2
+	addx	%o0,0,%o0
+	st	%g2,[%g1+%o2]
+
 	addcc	%o2,4,%o2
 	bne	LOC(loop)
-	 ld	[%o4+%o2],%o5
+	 nop

-	subcc	%o5,%g3,%g3
-	addx	%o0,%g0,%o0
 	retl
-	st	%g3,[%o4+%o2]
+	 nop

 END(__mpn_submul_1)
--- a/sysdeps/sparc/sparc32/udiv.S
+++ b/sysdeps/sparc/sparc32/udiv.S
@ -1,347 +1,16 @@
-   /* This file is generated from divrem.m4; DO NOT EDIT! */
 /*
- * Division and remainder, from Appendix E of the Sparc Version 8
- * Architecture Manual, with fixes from Gordon Irlam.
+ * Sparc v8 has divide.
 */

-/*
- * Input: dividend and divisor in %o0 and %o1 respectively.
- *
- * m4 parameters:
- *  .udiv	name of function to generate
- *  div		div=div => %o0 / %o1; div=rem => %o0 % %o1
- *  false		false=true => signed; false=false => unsigned
- *
- * Algorithm parameters:
- *  N		how many bits per iteration we try to get (4)
- *  WORDSIZE	total number of bits (32)
- *
- * Derived constants:
- *  TOPBITS	number of bits in the top decade of a number
- *
- * Important variables:
- *  Q		the partial quotient under development (initially 0)
- *  R		the remainder so far, initially the dividend
- *  ITER	number of main division loop iterations required;
- *		equal to ceil(log2(quotient) / N).  Note that this
- *		is the log base (2^N) of the quotient.
- *  V		the current comparand, initially divisor*2^(ITER*N-1)
- *
- * Cost:
- *  Current estimate for non-large dividend is
- *	ceil(log2(quotient) / N) * (10 + 7N/2) + C
- *  A large dividend is one greater than 2^(31-TOPBITS) and takes a
- *  different path, as the upper bits of the quotient must be developed
- *  one bit at a time.
- */
-
-
-
 #include <sysdep.h>
-#include <sys/trap.h>

 ENTRY(.udiv)

-	! Ready to divide.  Compute size of quotient; scale comparand.
-	orcc	%o1, %g0, %o5
-	bne	1f
-	mov	%o0, %o3
-
-		! Divide by zero trap.  If it returns, return 0 (about as
-		! wrong as possible, but that is what SunOS does...).
-		ta	ST_DIV0
-		retl
-		clr	%o0
-
-1:
-	cmp	%o3, %o5			! if %o1 exceeds %o0, done
-	blu	LOC(got_result)		! (and algorithm fails otherwise)
-	clr	%o2
-	sethi	%hi(1 << (32 - 4 - 1)), %g1
-	cmp	%o3, %g1
-	blu	LOC(not_really_big)
-	clr	%o4
-
-	! Here the dividend is >= 2**(31-N) or so.  We must be careful here,
-	! as our usual N-at-a-shot divide step will cause overflow and havoc.
-	! The number of bits in the result here is N*ITER+SC, where SC <= N.
-	! Compute ITER in an unorthodox manner: know we need to shift V into
-	! the top decade: so do not even bother to compare to R.
-	1:
-		cmp	%o5, %g1
-		bgeu	3f
-		mov	1, %g2
-		sll	%o5, 4, %o5
-		b	1b
-		add	%o4, 1, %o4
-
-	! Now compute %g2.
-	2:	addcc	%o5, %o5, %o5
-		bcc	LOC(not_too_big)
-		add	%g2, 1, %g2
-
-		! We get here if the %o1 overflowed while shifting.
-		! This means that %o3 has the high-order bit set.
-		! Restore %o5 and subtract from %o3.
-		sll	%g1, 4, %g1	! high order bit
-		srl	%o5, 1, %o5		! rest of %o5
-		add	%o5, %g1, %o5
-		b	LOC(do_single_div)
-		sub	%g2, 1, %g2
-
-	LOC(not_too_big):
-	3:	cmp	%o5, %o3
-		blu	2b
-		nop
-		be	LOC(do_single_div)
-		nop
-	/* NB: these are commented out in the V8-Sparc manual as well */
-	/* (I do not understand this) */
-	! %o5 > %o3: went too far: back up 1 step
-	!	srl	%o5, 1, %o5
-	!	dec	%g2
-	! do single-bit divide steps
-	!
-	! We have to be careful here.  We know that %o3 >= %o5, so we can do the
-	! first divide step without thinking.  BUT, the others are conditional,
-	! and are only done if %o3 >= 0.  Because both %o3 and %o5 may have the high-
-	! order bit set in the first step, just falling into the regular
-	! division loop will mess up the first time around.
-	! So we unroll slightly...
-	LOC(do_single_div):
-		subcc	%g2, 1, %g2
-		bl	LOC(end_regular_divide)
-		nop
-		sub	%o3, %o5, %o3
-		mov	1, %o2
-		b	LOC(end_single_divloop)
-		nop
-	LOC(single_divloop):
-		sll	%o2, 1, %o2
-		bl	1f
-		srl	%o5, 1, %o5
-		! %o3 >= 0
-		sub	%o3, %o5, %o3
-		b	2f
-		add	%o2, 1, %o2
-	1:	! %o3 < 0
-		add	%o3, %o5, %o3
-		sub	%o2, 1, %o2
-	2:
-	LOC(end_single_divloop):
-		subcc	%g2, 1, %g2
-		bge	LOC(single_divloop)
-		tst	%o3
-		b,a	LOC(end_regular_divide)
-
-LOC(not_really_big):
-1:
-	sll	%o5, 4, %o5
-	cmp	%o5, %o3
-	bleu	1b
-	addcc	%o4, 1, %o4
-	be	LOC(got_result)
-	sub	%o4, 1, %o4
-
-	tst	%o3	! set up for initial iteration
-LOC(divloop):
-	sll	%o2, 4, %o2
-		! depth 1, accumulated bits 0
-	bl	LOC(1.16)
-	srl	%o5,1,%o5
-	! remainder is positive
-	subcc	%o3,%o5,%o3
-			! depth 2, accumulated bits 1
-	bl	LOC(2.17)
-	srl	%o5,1,%o5
-	! remainder is positive
-	subcc	%o3,%o5,%o3
-			! depth 3, accumulated bits 3
-	bl	LOC(3.19)
-	srl	%o5,1,%o5
-	! remainder is positive
-	subcc	%o3,%o5,%o3
-			! depth 4, accumulated bits 7
-	bl	LOC(4.23)
-	srl	%o5,1,%o5
-	! remainder is positive
-	subcc	%o3,%o5,%o3
-		b	9f
-		add	%o2, (7*2+1), %o2
-
-LOC(4.23):
-	! remainder is negative
-	addcc	%o3,%o5,%o3
-		b	9f
-		add	%o2, (7*2-1), %o2
-
-
-LOC(3.19):
-	! remainder is negative
-	addcc	%o3,%o5,%o3
-			! depth 4, accumulated bits 5
-	bl	LOC(4.21)
-	srl	%o5,1,%o5
-	! remainder is positive
-	subcc	%o3,%o5,%o3
-		b	9f
-		add	%o2, (5*2+1), %o2
-
-LOC(4.21):
-	! remainder is negative
-	addcc	%o3,%o5,%o3
-		b	9f
-		add	%o2, (5*2-1), %o2
-
-
-
-LOC(2.17):
-	! remainder is negative
-	addcc	%o3,%o5,%o3
-			! depth 3, accumulated bits 1
-	bl	LOC(3.17)
-	srl	%o5,1,%o5
-	! remainder is positive
-	subcc	%o3,%o5,%o3
-			! depth 4, accumulated bits 3
-	bl	LOC(4.19)
-	srl	%o5,1,%o5
-	! remainder is positive
-	subcc	%o3,%o5,%o3
-		b	9f
-		add	%o2, (3*2+1), %o2
-
-LOC(4.19):
-	! remainder is negative
-	addcc	%o3,%o5,%o3
-		b	9f
-		add	%o2, (3*2-1), %o2
-
-
-LOC(3.17):
-	! remainder is negative
-	addcc	%o3,%o5,%o3
-			! depth 4, accumulated bits 1
-	bl	LOC(4.17)
-	srl	%o5,1,%o5
-	! remainder is positive
-	subcc	%o3,%o5,%o3
-		b	9f
-		add	%o2, (1*2+1), %o2
-
-LOC(4.17):
-	! remainder is negative
-	addcc	%o3,%o5,%o3
-		b	9f
-		add	%o2, (1*2-1), %o2
-
-
-
-
-LOC(1.16):
-	! remainder is negative
-	addcc	%o3,%o5,%o3
-			! depth 2, accumulated bits -1
-	bl	LOC(2.15)
-	srl	%o5,1,%o5
-	! remainder is positive
-	subcc	%o3,%o5,%o3
-			! depth 3, accumulated bits -1
-	bl	LOC(3.15)
-	srl	%o5,1,%o5
-	! remainder is positive
-	subcc	%o3,%o5,%o3
-			! depth 4, accumulated bits -1
-	bl	LOC(4.15)
-	srl	%o5,1,%o5
-	! remainder is positive
-	subcc	%o3,%o5,%o3
-		b	9f
-		add	%o2, (-1*2+1), %o2
-
-LOC(4.15):
-	! remainder is negative
-	addcc	%o3,%o5,%o3
-		b	9f
-		add	%o2, (-1*2-1), %o2
-
-
-LOC(3.15):
-	! remainder is negative
-	addcc	%o3,%o5,%o3
-			! depth 4, accumulated bits -3
-	bl	LOC(4.13)
-	srl	%o5,1,%o5
-	! remainder is positive
-	subcc	%o3,%o5,%o3
-		b	9f
-		add	%o2, (-3*2+1), %o2
-
-LOC(4.13):
-	! remainder is negative
-	addcc	%o3,%o5,%o3
-		b	9f
-		add	%o2, (-3*2-1), %o2
-
-
-
-LOC(2.15):
-	! remainder is negative
-	addcc	%o3,%o5,%o3
-			! depth 3, accumulated bits -3
-	bl	LOC(3.13)
-	srl	%o5,1,%o5
-	! remainder is positive
-	subcc	%o3,%o5,%o3
-			! depth 4, accumulated bits -5
-	bl	LOC(4.11)
-	srl	%o5,1,%o5
-	! remainder is positive
-	subcc	%o3,%o5,%o3
-		b	9f
-		add	%o2, (-5*2+1), %o2
-
-LOC(4.11):
-	! remainder is negative
-	addcc	%o3,%o5,%o3
-		b	9f
-		add	%o2, (-5*2-1), %o2
-
-
-LOC(3.13):
-	! remainder is negative
-	addcc	%o3,%o5,%o3
-			! depth 4, accumulated bits -7
-	bl	LOC(4.9)
-	srl	%o5,1,%o5
-	! remainder is positive
-	subcc	%o3,%o5,%o3
-		b	9f
-		add	%o2, (-7*2+1), %o2
-
-LOC(4.9):
-	! remainder is negative
-	addcc	%o3,%o5,%o3
-		b	9f
-		add	%o2, (-7*2-1), %o2
-
-
-
-
-9:
-LOC(end_regular_divide):
-	subcc	%o4, 1, %o4
-	bge	LOC(divloop)
-	tst	%o3
-	bl,a	LOC(got_result)
-	! non-restoring fixup here (one instruction only!)
-	sub	%o2, 1, %o2
-
-
-LOC(got_result):
-
+	wr	%g0, 0, %y
+	nop
+	nop
 	retl
-	mov %o2, %o0
+	 udiv	%o0, %o1, %o0

 END(.udiv)
 strong_alias (.udiv, __wrap_.udiv)
--- a/sysdeps/sparc/sparc32/umul.S
+++ b/sysdeps/sparc/sparc32/umul.S
@ -1,155 +1,13 @@
 /*
- * Unsigned multiply.  Returns %o0 * %o1 in %o1%o0 (i.e., %o1 holds the
- * upper 32 bits of the 64-bit product).
- *
- * This code optimizes short (less than 13-bit) multiplies.  Short
- * multiplies require 25 instruction cycles, and long ones require
- * 45 instruction cycles.
- *
- * On return, overflow has occurred (%o1 is not zero) if and only if
- * the Z condition code is clear, allowing, e.g., the following:
- *
- *	call	.umul
- *	nop
- *	bnz	overflow	(or tnz)
+ * Sparc v8 has multiply.
 */

 #include <sysdep.h>

 ENTRY(.umul)
-	or	%o0, %o1, %o4
-	mov	%o0, %y			! multiplier -> Y
-	andncc	%o4, 0xfff, %g0		! test bits 12..31 of *both* args
-	be	LOC(mul_shortway)	! if zero, can do it the short way
-	 andcc	%g0, %g0, %o4		! zero the partial product; clear N & V

-	/*
-	 * Long multiply.  32 steps, followed by a final shift step.
-	 */
-	mulscc	%o4, %o1, %o4	! 1
-	mulscc	%o4, %o1, %o4	! 2
-	mulscc	%o4, %o1, %o4	! 3
-	mulscc	%o4, %o1, %o4	! 4
-	mulscc	%o4, %o1, %o4	! 5
-	mulscc	%o4, %o1, %o4	! 6
-	mulscc	%o4, %o1, %o4	! 7
-	mulscc	%o4, %o1, %o4	! 8
-	mulscc	%o4, %o1, %o4	! 9
-	mulscc	%o4, %o1, %o4	! 10
-	mulscc	%o4, %o1, %o4	! 11
-	mulscc	%o4, %o1, %o4	! 12
-	mulscc	%o4, %o1, %o4	! 13
-	mulscc	%o4, %o1, %o4	! 14
-	mulscc	%o4, %o1, %o4	! 15
-	mulscc	%o4, %o1, %o4	! 16
-	mulscc	%o4, %o1, %o4	! 17
-	mulscc	%o4, %o1, %o4	! 18
-	mulscc	%o4, %o1, %o4	! 19
-	mulscc	%o4, %o1, %o4	! 20
-	mulscc	%o4, %o1, %o4	! 21
-	mulscc	%o4, %o1, %o4	! 22
-	mulscc	%o4, %o1, %o4	! 23
-	mulscc	%o4, %o1, %o4	! 24
-	mulscc	%o4, %o1, %o4	! 25
-	mulscc	%o4, %o1, %o4	! 26
-	mulscc	%o4, %o1, %o4	! 27
-	mulscc	%o4, %o1, %o4	! 28
-	mulscc	%o4, %o1, %o4	! 29
-	mulscc	%o4, %o1, %o4	! 30
-	mulscc	%o4, %o1, %o4	! 31
-	mulscc	%o4, %o1, %o4	! 32
-	mulscc	%o4, %g0, %o4	! final shift
-
-	/*
-	 * Normally, with the shift-and-add approach, if both numbers are
-	 * positive you get the correct result.  With 32-bit two's-complement
-	 * numbers, -x is represented as
-	 *
-	 *		  x		    32
-	 *	( 2  -  ------ ) mod 2  *  2
-	 *		   32
-	 *		  2
-	 *
-	 * (the `mod 2' subtracts 1 from 1.bbbb).  To avoid lots of 2^32s,
-	 * we can treat this as if the radix point were just to the left
-	 * of the sign bit (multiply by 2^32), and get
-	 *
-	 *	-x  =  (2 - x) mod 2
-	 *
-	 * Then, ignoring the `mod 2's for convenience:
-	 *
-	 *   x *  y	= xy
-	 *  -x *  y	= 2y - xy
-	 *   x * -y	= 2x - xy
-	 *  -x * -y	= 4 - 2x - 2y + xy
-	 *
-	 * For signed multiplies, we subtract (x << 32) from the partial
-	 * product to fix this problem for negative multipliers (see mul.s).
-	 * Because of the way the shift into the partial product is calculated
-	 * (N xor V), this term is automatically removed for the multiplicand,
-	 * so we don't have to adjust.
-	 *
-	 * But for unsigned multiplies, the high order bit wasn't a sign bit,
-	 * and the correction is wrong.  So for unsigned multiplies where the
-	 * high order bit is one, we end up with xy - (y << 32).  To fix it
-	 * we add y << 32.
-	 */
-#if 0
-	tst	%o1
-	bl,a	1f		! if %o1 < 0 (high order bit = 1),
-	 add	%o4, %o0, %o4	! %o4 += %o0 (add y to upper half)
-1:	rd	%y, %o0		! get lower half of product
+	umul	%o0, %o1, %o0
 	retl
-	 addcc	%o4, %g0, %o1	! put upper half in place and set Z for %o1==0
-#else
-	/* Faster code from tege@sics.se.  */
-	sra	%o1, 31, %o2	! make mask from sign bit
-	and	%o0, %o2, %o2	! %o2 = 0 or %o0, depending on sign of %o1
-	rd	%y, %o0		! get lower half of product
-	retl
-	 addcc	%o4, %o2, %o1	! add compensation and put upper half in place
-#endif
-
-LOC(mul_shortway):
-	/*
-	 * Short multiply.  12 steps, followed by a final shift step.
-	 * The resulting bits are off by 12 and (32-12) = 20 bit positions,
-	 * but there is no problem with %o0 being negative (unlike above),
-	 * and overflow is impossible (the answer is at most 24 bits long).
-	 */
-	mulscc	%o4, %o1, %o4	! 1
-	mulscc	%o4, %o1, %o4	! 2
-	mulscc	%o4, %o1, %o4	! 3
-	mulscc	%o4, %o1, %o4	! 4
-	mulscc	%o4, %o1, %o4	! 5
-	mulscc	%o4, %o1, %o4	! 6
-	mulscc	%o4, %o1, %o4	! 7
-	mulscc	%o4, %o1, %o4	! 8
-	mulscc	%o4, %o1, %o4	! 9
-	mulscc	%o4, %o1, %o4	! 10
-	mulscc	%o4, %o1, %o4	! 11
-	mulscc	%o4, %o1, %o4	! 12
-	mulscc	%o4, %g0, %o4	! final shift
-
-	/*
-	 * %o4 has 20 of the bits that should be in the result; %y has
-	 * the bottom 12 (as %y's top 12).  That is:
-	 *
-	 *	  %o4		    %y
-	 * +----------------+----------------+
-	 * | -12- |   -20-  | -12- |   -20-  |
-	 * +------(---------+------)---------+
-	 *	   -----result-----
-	 *
-	 * The 12 bits of %o4 left of the `result' area are all zero;
-	 * in fact, all top 20 bits of %o4 are zero.
-	 */
-
-	rd	%y, %o5
-	sll	%o4, 12, %o0	! shift middle bits left 12
-	srl	%o5, 20, %o5	! shift low bits right 20
-	or	%o5, %o0, %o0
-	retl
-	 addcc	%g0, %g0, %o1	! %o1 = zero, and set Z
+	 rd	%y, %o1

 END(.umul)
--- a/sysdeps/sparc/sparc32/urem.S
+++ b/sysdeps/sparc/sparc32/urem.S
@ -1,346 +1,18 @@
-   /* This file is generated from divrem.m4; DO NOT EDIT! */
 /*
- * Division and remainder, from Appendix E of the Sparc Version 8
- * Architecture Manual, with fixes from Gordon Irlam.
+ * Sparc v8 has divide.
 */

-/*
- * Input: dividend and divisor in %o0 and %o1 respectively.
- *
- * m4 parameters:
- *  .urem	name of function to generate
- *  rem		rem=div => %o0 / %o1; rem=rem => %o0 % %o1
- *  false		false=true => signed; false=false => unsigned
- *
- * Algorithm parameters:
- *  N		how many bits per iteration we try to get (4)
- *  WORDSIZE	total number of bits (32)
- *
- * Derived constants:
- *  TOPBITS	number of bits in the top decade of a number
- *
- * Important variables:
- *  Q		the partial quotient under development (initially 0)
- *  R		the remainder so far, initially the dividend
- *  ITER	number of main division loop iterations required;
- *		equal to ceil(log2(quotient) / N).  Note that this
- *		is the log base (2^N) of the quotient.
- *  V		the current comparand, initially divisor*2^(ITER*N-1)
- *
- * Cost:
- *  Current estimate for non-large dividend is
- *	ceil(log2(quotient) / N) * (10 + 7N/2) + C
- *  A large dividend is one greater than 2^(31-TOPBITS) and takes a
- *  different path, as the upper bits of the quotient must be developed
- *  one bit at a time.
- */
-
-
-
 #include <sysdep.h>
-#include <sys/trap.h>

 ENTRY(.urem)

-	! Ready to divide.  Compute size of quotient; scale comparand.
-	orcc	%o1, %g0, %o5
-	bne	1f
-	mov	%o0, %o3
-
-		! Divide by zero trap.  If it returns, return 0 (about as
-		! wrong as possible, but that is what SunOS does...).
-		ta	ST_DIV0
-		retl
-		clr	%o0
-
-1:
-	cmp	%o3, %o5			! if %o1 exceeds %o0, done
-	blu	LOC(got_result)		! (and algorithm fails otherwise)
-	clr	%o2
-	sethi	%hi(1 << (32 - 4 - 1)), %g1
-	cmp	%o3, %g1
-	blu	LOC(not_really_big)
-	clr	%o4
-
-	! Here the dividend is >= 2**(31-N) or so.  We must be careful here,
-	! as our usual N-at-a-shot divide step will cause overflow and havoc.
-	! The number of bits in the result here is N*ITER+SC, where SC <= N.
-	! Compute ITER in an unorthodox manner: know we need to shift V into
-	! the top decade: so do not even bother to compare to R.
-	1:
-		cmp	%o5, %g1
-		bgeu	3f
-		mov	1, %g2
-		sll	%o5, 4, %o5
-		b	1b
-		add	%o4, 1, %o4
-
-	! Now compute %g2.
-	2:	addcc	%o5, %o5, %o5
-		bcc	LOC(not_too_big)
-		add	%g2, 1, %g2
-
-		! We get here if the %o1 overflowed while shifting.
-		! This means that %o3 has the high-order bit set.
-		! Restore %o5 and subtract from %o3.
-		sll	%g1, 4, %g1	! high order bit
-		srl	%o5, 1, %o5		! rest of %o5
-		add	%o5, %g1, %o5
-		b	LOC(do_single_div)
-		sub	%g2, 1, %g2
-
-	LOC(not_too_big):
-	3:	cmp	%o5, %o3
-		blu	2b
-		nop
-		be	LOC(do_single_div)
-		nop
-	/* NB: these are commented out in the V8-Sparc manual as well */
-	/* (I do not understand this) */
-	! %o5 > %o3: went too far: back up 1 step
-	!	srl	%o5, 1, %o5
-	!	dec	%g2
-	! do single-bit divide steps
-	!
-	! We have to be careful here.  We know that %o3 >= %o5, so we can do the
-	! first divide step without thinking.  BUT, the others are conditional,
-	! and are only done if %o3 >= 0.  Because both %o3 and %o5 may have the high-
-	! order bit set in the first step, just falling into the regular
-	! division loop will mess up the first time around.
-	! So we unroll slightly...
-	LOC(do_single_div):
-		subcc	%g2, 1, %g2
-		bl	LOC(end_regular_divide)
-		nop
-		sub	%o3, %o5, %o3
-		mov	1, %o2
-		b	LOC(end_single_divloop)
-		nop
-	LOC(single_divloop):
-		sll	%o2, 1, %o2
-		bl	1f
-		srl	%o5, 1, %o5
-		! %o3 >= 0
-		sub	%o3, %o5, %o3
-		b	2f
-		add	%o2, 1, %o2
-	1:	! %o3 < 0
-		add	%o3, %o5, %o3
-		sub	%o2, 1, %o2
-	2:
-	LOC(end_single_divloop):
-		subcc	%g2, 1, %g2
-		bge	LOC(single_divloop)
-		tst	%o3
-		b,a	LOC(end_regular_divide)
-
-LOC(not_really_big):
-1:
-	sll	%o5, 4, %o5
-	cmp	%o5, %o3
-	bleu	1b
-	addcc	%o4, 1, %o4
-	be	LOC(got_result)
-	sub	%o4, 1, %o4
-
-	tst	%o3	! set up for initial iteration
-LOC(divloop):
-	sll	%o2, 4, %o2
-		! depth 1, accumulated bits 0
-	bl	LOC(1.16)
-	srl	%o5,1,%o5
-	! remainder is positive
-	subcc	%o3,%o5,%o3
-			! depth 2, accumulated bits 1
-	bl	LOC(2.17)
-	srl	%o5,1,%o5
-	! remainder is positive
-	subcc	%o3,%o5,%o3
-			! depth 3, accumulated bits 3
-	bl	LOC(3.19)
-	srl	%o5,1,%o5
-	! remainder is positive
-	subcc	%o3,%o5,%o3
-			! depth 4, accumulated bits 7
-	bl	LOC(4.23)
-	srl	%o5,1,%o5
-	! remainder is positive
-	subcc	%o3,%o5,%o3
-		b	9f
-		add	%o2, (7*2+1), %o2
-
-LOC(4.23):
-	! remainder is negative
-	addcc	%o3,%o5,%o3
-		b	9f
-		add	%o2, (7*2-1), %o2
-
-
-LOC(3.19):
-	! remainder is negative
-	addcc	%o3,%o5,%o3
-			! depth 4, accumulated bits 5
-	bl	LOC(4.21)
-	srl	%o5,1,%o5
-	! remainder is positive
-	subcc	%o3,%o5,%o3
-		b	9f
-		add	%o2, (5*2+1), %o2
-
-LOC(4.21):
-	! remainder is negative
-	addcc	%o3,%o5,%o3
-		b	9f
-		add	%o2, (5*2-1), %o2
-
-
-
-LOC(2.17):
-	! remainder is negative
-	addcc	%o3,%o5,%o3
-			! depth 3, accumulated bits 1
-	bl	LOC(3.17)
-	srl	%o5,1,%o5
-	! remainder is positive
-	subcc	%o3,%o5,%o3
-			! depth 4, accumulated bits 3
-	bl	LOC(4.19)
-	srl	%o5,1,%o5
-	! remainder is positive
-	subcc	%o3,%o5,%o3
-		b	9f
-		add	%o2, (3*2+1), %o2
-
-LOC(4.19):
-	! remainder is negative
-	addcc	%o3,%o5,%o3
-		b	9f
-		add	%o2, (3*2-1), %o2
-
-
-LOC(3.17):
-	! remainder is negative
-	addcc	%o3,%o5,%o3
-			! depth 4, accumulated bits 1
-	bl	LOC(4.17)
-	srl	%o5,1,%o5
-	! remainder is positive
-	subcc	%o3,%o5,%o3
-		b	9f
-		add	%o2, (1*2+1), %o2
-
-LOC(4.17):
-	! remainder is negative
-	addcc	%o3,%o5,%o3
-		b	9f
-		add	%o2, (1*2-1), %o2
-
-
-
-
-LOC(1.16):
-	! remainder is negative
-	addcc	%o3,%o5,%o3
-			! depth 2, accumulated bits -1
-	bl	LOC(2.15)
-	srl	%o5,1,%o5
-	! remainder is positive
-	subcc	%o3,%o5,%o3
-			! depth 3, accumulated bits -1
-	bl	LOC(3.15)
-	srl	%o5,1,%o5
-	! remainder is positive
-	subcc	%o3,%o5,%o3
-			! depth 4, accumulated bits -1
-	bl	LOC(4.15)
-	srl	%o5,1,%o5
-	! remainder is positive
-	subcc	%o3,%o5,%o3
-		b	9f
-		add	%o2, (-1*2+1), %o2
-
-LOC(4.15):
-	! remainder is negative
-	addcc	%o3,%o5,%o3
-		b	9f
-		add	%o2, (-1*2-1), %o2
-
-
-LOC(3.15):
-	! remainder is negative
-	addcc	%o3,%o5,%o3
-			! depth 4, accumulated bits -3
-	bl	LOC(4.13)
-	srl	%o5,1,%o5
-	! remainder is positive
-	subcc	%o3,%o5,%o3
-		b	9f
-		add	%o2, (-3*2+1), %o2
-
-LOC(4.13):
-	! remainder is negative
-	addcc	%o3,%o5,%o3
-		b	9f
-		add	%o2, (-3*2-1), %o2
-
-
-
-LOC(2.15):
-	! remainder is negative
-	addcc	%o3,%o5,%o3
-			! depth 3, accumulated bits -3
-	bl	LOC(3.13)
-	srl	%o5,1,%o5
-	! remainder is positive
-	subcc	%o3,%o5,%o3
-			! depth 4, accumulated bits -5
-	bl	LOC(4.11)
-	srl	%o5,1,%o5
-	! remainder is positive
-	subcc	%o3,%o5,%o3
-		b	9f
-		add	%o2, (-5*2+1), %o2
-
-LOC(4.11):
-	! remainder is negative
-	addcc	%o3,%o5,%o3
-		b	9f
-		add	%o2, (-5*2-1), %o2
-
-
-LOC(3.13):
-	! remainder is negative
-	addcc	%o3,%o5,%o3
-			! depth 4, accumulated bits -7
-	bl	LOC(4.9)
-	srl	%o5,1,%o5
-	! remainder is positive
-	subcc	%o3,%o5,%o3
-		b	9f
-		add	%o2, (-7*2+1), %o2
-
-LOC(4.9):
-	! remainder is negative
-	addcc	%o3,%o5,%o3
-		b	9f
-		add	%o2, (-7*2-1), %o2
-
-
-
-
-9:
-LOC(end_regular_divide):
-	subcc	%o4, 1, %o4
-	bge	LOC(divloop)
-	tst	%o3
-	bl,a	LOC(got_result)
-	! non-restoring fixup here (one instruction only!)
-	add	%o3, %o1, %o3
-
-
-LOC(got_result):
-
+	wr	%g0, 0, %y
+	nop
+	nop
+	nop
+	udiv	%o0, %o1, %o2
+	umul	%o2, %o1, %o2
 	retl
-	mov %o3, %o0
+	 sub	%o0, %o2, %o0

 END(.urem)