glibc/sysdeps/sparc/sparc32/urem.S
Joseph Myers 174edbde7e Update SPARC divrem generation to match output.
While working on another patch I noticed that (a)
sysdeps/sparc/sparc32/Makefile is the only place with special
realclean settings, apart from po/, and (b) the generated files with a
rule in that Makefile to generate them (using m4) had been patched
manually so no longer corresponded with the output of the generator -
so if the timestamps were wrong, a build would result in changes to
the files in the source directory.  (They also didn't correspond
because of changes in make 3.81 to how make handles whitespace at the
start of a line in a sequence of backslash-newline continuation lines
within a recipe.)

This patch fixes the generation and output files to match.  The issue
with make and whitespace at start of continuation lines is fixed by
putting those newlines outside of arguments to echo, so the number of
spaces in the argument matches the number in the existing generated
files.  Then divrem.m4 is changed to avoid generating whitespace-only
lines (my fix to the outputs from 2013; this fix to the generator also
changes the indentation of a label in the output files) and to
generate an alias in udiv.S (Adhemerval's fix from March).

build-many-glibcs.py doesn't have a non-v9 SPARC configuration,
because non-v9 32-bit SPARC didn't build when I set up
build-many-glibcs.py but sparcv9 did build.  Whether or not non-v9
32-bit SPARC now builds (or indeed whether or not support for it is
obsolete), I tested by removing the sparcv8 and sparcv9 versions of
the four files in question, so forcing the generated files to be built
and used, and the compilation parts of the glibc testsuite passed.

	* sysdeps/sparc/sparc32/Makefile
	($(divrem:%=$(sysdep_dir)/sparc/sparc32/%.S)): Do not include
	start-of-line whitespace in argument of echo.
	* sysdeps/sparc/sparc32/divrem.m4: Avoid generating lines starting
	with whitespace.  Generate __wrap_.udiv alias.
	* sysdeps/sparc/sparc32/rem.S: Regenerated.
	* sysdeps/sparc/sparc32/sdiv.S: Likewise.
	* sysdeps/sparc/sparc32/udiv.S: Likewise.
	* sysdeps/sparc/sparc32/urem.S: Likewise.
2017-12-15 14:06:07 +00:00

347 lines
6.8 KiB
ArmAsm

/* This file is generated from divrem.m4; DO NOT EDIT! */
/*
* Division and remainder, from Appendix E of the Sparc Version 8
* Architecture Manual, with fixes from Gordon Irlam.
*/
/*
* Input: dividend and divisor in %o0 and %o1 respectively.
*
* m4 parameters:
* .urem name of function to generate
* rem rem=div => %o0 / %o1; rem=rem => %o0 % %o1
* false false=true => signed; false=false => unsigned
*
* Algorithm parameters:
* N how many bits per iteration we try to get (4)
* WORDSIZE total number of bits (32)
*
* Derived constants:
* TOPBITS number of bits in the top decade of a number
*
* Important variables:
* Q the partial quotient under development (initially 0)
* R the remainder so far, initially the dividend
* ITER number of main division loop iterations required;
* equal to ceil(log2(quotient) / N). Note that this
* is the log base (2^N) of the quotient.
* V the current comparand, initially divisor*2^(ITER*N-1)
*
* Cost:
* Current estimate for non-large dividend is
* ceil(log2(quotient) / N) * (10 + 7N/2) + C
* A large dividend is one greater than 2^(31-TOPBITS) and takes a
* different path, as the upper bits of the quotient must be developed
* one bit at a time.
*/
#include <sysdep.h>
#include <sys/trap.h>
ENTRY(.urem)
! Ready to divide. Compute size of quotient; scale comparand.
orcc %o1, %g0, %o5
bne 1f
mov %o0, %o3
! Divide by zero trap. If it returns, return 0 (about as
! wrong as possible, but that is what SunOS does...).
ta ST_DIV0
retl
clr %o0
1:
cmp %o3, %o5 ! if %o1 exceeds %o0, done
blu LOC(got_result) ! (and algorithm fails otherwise)
clr %o2
sethi %hi(1 << (32 - 4 - 1)), %g1
cmp %o3, %g1
blu LOC(not_really_big)
clr %o4
! Here the dividend is >= 2**(31-N) or so. We must be careful here,
! as our usual N-at-a-shot divide step will cause overflow and havoc.
! The number of bits in the result here is N*ITER+SC, where SC <= N.
! Compute ITER in an unorthodox manner: know we need to shift V into
! the top decade: so do not even bother to compare to R.
1:
cmp %o5, %g1
bgeu 3f
mov 1, %g2
sll %o5, 4, %o5
b 1b
add %o4, 1, %o4
! Now compute %g2.
2: addcc %o5, %o5, %o5
bcc LOC(not_too_big)
add %g2, 1, %g2
! We get here if the %o1 overflowed while shifting.
! This means that %o3 has the high-order bit set.
! Restore %o5 and subtract from %o3.
sll %g1, 4, %g1 ! high order bit
srl %o5, 1, %o5 ! rest of %o5
add %o5, %g1, %o5
b LOC(do_single_div)
sub %g2, 1, %g2
LOC(not_too_big):
3: cmp %o5, %o3
blu 2b
nop
be LOC(do_single_div)
nop
/* NB: these are commented out in the V8-Sparc manual as well */
/* (I do not understand this) */
! %o5 > %o3: went too far: back up 1 step
! srl %o5, 1, %o5
! dec %g2
! do single-bit divide steps
!
! We have to be careful here. We know that %o3 >= %o5, so we can do the
! first divide step without thinking. BUT, the others are conditional,
! and are only done if %o3 >= 0. Because both %o3 and %o5 may have the high-
! order bit set in the first step, just falling into the regular
! division loop will mess up the first time around.
! So we unroll slightly...
LOC(do_single_div):
subcc %g2, 1, %g2
bl LOC(end_regular_divide)
nop
sub %o3, %o5, %o3
mov 1, %o2
b LOC(end_single_divloop)
nop
LOC(single_divloop):
sll %o2, 1, %o2
bl 1f
srl %o5, 1, %o5
! %o3 >= 0
sub %o3, %o5, %o3
b 2f
add %o2, 1, %o2
1: ! %o3 < 0
add %o3, %o5, %o3
sub %o2, 1, %o2
2:
LOC(end_single_divloop):
subcc %g2, 1, %g2
bge LOC(single_divloop)
tst %o3
b,a LOC(end_regular_divide)
LOC(not_really_big):
1:
sll %o5, 4, %o5
cmp %o5, %o3
bleu 1b
addcc %o4, 1, %o4
be LOC(got_result)
sub %o4, 1, %o4
tst %o3 ! set up for initial iteration
LOC(divloop):
sll %o2, 4, %o2
! depth 1, accumulated bits 0
bl LOC(1.16)
srl %o5,1,%o5
! remainder is positive
subcc %o3,%o5,%o3
! depth 2, accumulated bits 1
bl LOC(2.17)
srl %o5,1,%o5
! remainder is positive
subcc %o3,%o5,%o3
! depth 3, accumulated bits 3
bl LOC(3.19)
srl %o5,1,%o5
! remainder is positive
subcc %o3,%o5,%o3
! depth 4, accumulated bits 7
bl LOC(4.23)
srl %o5,1,%o5
! remainder is positive
subcc %o3,%o5,%o3
b 9f
add %o2, (7*2+1), %o2
LOC(4.23):
! remainder is negative
addcc %o3,%o5,%o3
b 9f
add %o2, (7*2-1), %o2
LOC(3.19):
! remainder is negative
addcc %o3,%o5,%o3
! depth 4, accumulated bits 5
bl LOC(4.21)
srl %o5,1,%o5
! remainder is positive
subcc %o3,%o5,%o3
b 9f
add %o2, (5*2+1), %o2
LOC(4.21):
! remainder is negative
addcc %o3,%o5,%o3
b 9f
add %o2, (5*2-1), %o2
LOC(2.17):
! remainder is negative
addcc %o3,%o5,%o3
! depth 3, accumulated bits 1
bl LOC(3.17)
srl %o5,1,%o5
! remainder is positive
subcc %o3,%o5,%o3
! depth 4, accumulated bits 3
bl LOC(4.19)
srl %o5,1,%o5
! remainder is positive
subcc %o3,%o5,%o3
b 9f
add %o2, (3*2+1), %o2
LOC(4.19):
! remainder is negative
addcc %o3,%o5,%o3
b 9f
add %o2, (3*2-1), %o2
LOC(3.17):
! remainder is negative
addcc %o3,%o5,%o3
! depth 4, accumulated bits 1
bl LOC(4.17)
srl %o5,1,%o5
! remainder is positive
subcc %o3,%o5,%o3
b 9f
add %o2, (1*2+1), %o2
LOC(4.17):
! remainder is negative
addcc %o3,%o5,%o3
b 9f
add %o2, (1*2-1), %o2
LOC(1.16):
! remainder is negative
addcc %o3,%o5,%o3
! depth 2, accumulated bits -1
bl LOC(2.15)
srl %o5,1,%o5
! remainder is positive
subcc %o3,%o5,%o3
! depth 3, accumulated bits -1
bl LOC(3.15)
srl %o5,1,%o5
! remainder is positive
subcc %o3,%o5,%o3
! depth 4, accumulated bits -1
bl LOC(4.15)
srl %o5,1,%o5
! remainder is positive
subcc %o3,%o5,%o3
b 9f
add %o2, (-1*2+1), %o2
LOC(4.15):
! remainder is negative
addcc %o3,%o5,%o3
b 9f
add %o2, (-1*2-1), %o2
LOC(3.15):
! remainder is negative
addcc %o3,%o5,%o3
! depth 4, accumulated bits -3
bl LOC(4.13)
srl %o5,1,%o5
! remainder is positive
subcc %o3,%o5,%o3
b 9f
add %o2, (-3*2+1), %o2
LOC(4.13):
! remainder is negative
addcc %o3,%o5,%o3
b 9f
add %o2, (-3*2-1), %o2
LOC(2.15):
! remainder is negative
addcc %o3,%o5,%o3
! depth 3, accumulated bits -3
bl LOC(3.13)
srl %o5,1,%o5
! remainder is positive
subcc %o3,%o5,%o3
! depth 4, accumulated bits -5
bl LOC(4.11)
srl %o5,1,%o5
! remainder is positive
subcc %o3,%o5,%o3
b 9f
add %o2, (-5*2+1), %o2
LOC(4.11):
! remainder is negative
addcc %o3,%o5,%o3
b 9f
add %o2, (-5*2-1), %o2
LOC(3.13):
! remainder is negative
addcc %o3,%o5,%o3
! depth 4, accumulated bits -7
bl LOC(4.9)
srl %o5,1,%o5
! remainder is positive
subcc %o3,%o5,%o3
b 9f
add %o2, (-7*2+1), %o2
LOC(4.9):
! remainder is negative
addcc %o3,%o5,%o3
b 9f
add %o2, (-7*2-1), %o2
9:
LOC(end_regular_divide):
subcc %o4, 1, %o4
bge LOC(divloop)
tst %o3
bl,a LOC(got_result)
! non-restoring fixup here (one instruction only!)
add %o3, %o1, %o3
LOC(got_result):
retl
mov %o3, %o0
END(.urem)