glibc/sysdeps/sparc/sparc32/udiv.S
Adhemerval Zanella bdc543e338 sparc: Fix .udiv plt on libc
With the removal of divdi3 object from sparcv9-linux-gnu build, its
definition came from libgcc and its functions internall calls .udiv.
Since glibc also exports these symbols for compatibility reasons, it
will end up creating PLT calls internally in libc.so.

To avoid it, this patch uses the linker option --wrap to replace all
the internal libc.so .udiv calls to the wrapper __wrap_.udiv. Along
with strong alias in the udiv implementations, it makes linker do
local calls.

Checked on sparcv9-linux-gnu.

	* sysdeps/sparc/sparc32/Makefile (libc.so-gnulib): New rule.
	* sysdeps/sparc/sparc32/sparcv8/udiv.S (.udiv): Make a strong_alias
	to __wrap_.udiv.
	* sysdeps/sparc/sparc32/sparcv9/udiv.S (.udiv): Likewise.
	* sysdeps/sparc/sparc32/udiv.S (.udiv): Likewise.
2017-04-06 15:14:44 -03:00

348 lines
6.9 KiB
ArmAsm

/* This file is generated from divrem.m4; DO NOT EDIT! */
/*
* Division and remainder, from Appendix E of the Sparc Version 8
* Architecture Manual, with fixes from Gordon Irlam.
*/
/*
* Input: dividend and divisor in %o0 and %o1 respectively.
*
* m4 parameters:
* .udiv name of function to generate
* div div=div => %o0 / %o1; div=rem => %o0 % %o1
* false false=true => signed; false=false => unsigned
*
* Algorithm parameters:
* N how many bits per iteration we try to get (4)
* WORDSIZE total number of bits (32)
*
* Derived constants:
* TOPBITS number of bits in the top decade of a number
*
* Important variables:
* Q the partial quotient under development (initially 0)
* R the remainder so far, initially the dividend
* ITER number of main division loop iterations required;
* equal to ceil(log2(quotient) / N). Note that this
* is the log base (2^N) of the quotient.
* V the current comparand, initially divisor*2^(ITER*N-1)
*
* Cost:
* Current estimate for non-large dividend is
* ceil(log2(quotient) / N) * (10 + 7N/2) + C
* A large dividend is one greater than 2^(31-TOPBITS) and takes a
* different path, as the upper bits of the quotient must be developed
* one bit at a time.
*/
#include <sysdep.h>
#include <sys/trap.h>
ENTRY(.udiv)
! Ready to divide. Compute size of quotient; scale comparand.
orcc %o1, %g0, %o5
bne 1f
mov %o0, %o3
! Divide by zero trap. If it returns, return 0 (about as
! wrong as possible, but that is what SunOS does...).
ta ST_DIV0
retl
clr %o0
1:
cmp %o3, %o5 ! if %o1 exceeds %o0, done
blu LOC(got_result) ! (and algorithm fails otherwise)
clr %o2
sethi %hi(1 << (32 - 4 - 1)), %g1
cmp %o3, %g1
blu LOC(not_really_big)
clr %o4
! Here the dividend is >= 2**(31-N) or so. We must be careful here,
! as our usual N-at-a-shot divide step will cause overflow and havoc.
! The number of bits in the result here is N*ITER+SC, where SC <= N.
! Compute ITER in an unorthodox manner: know we need to shift V into
! the top decade: so do not even bother to compare to R.
1:
cmp %o5, %g1
bgeu 3f
mov 1, %g2
sll %o5, 4, %o5
b 1b
add %o4, 1, %o4
! Now compute %g2.
2: addcc %o5, %o5, %o5
bcc LOC(not_too_big)
add %g2, 1, %g2
! We get here if the %o1 overflowed while shifting.
! This means that %o3 has the high-order bit set.
! Restore %o5 and subtract from %o3.
sll %g1, 4, %g1 ! high order bit
srl %o5, 1, %o5 ! rest of %o5
add %o5, %g1, %o5
b LOC(do_single_div)
sub %g2, 1, %g2
LOC(not_too_big):
3: cmp %o5, %o3
blu 2b
nop
be LOC(do_single_div)
nop
/* NB: these are commented out in the V8-Sparc manual as well */
/* (I do not understand this) */
! %o5 > %o3: went too far: back up 1 step
! srl %o5, 1, %o5
! dec %g2
! do single-bit divide steps
!
! We have to be careful here. We know that %o3 >= %o5, so we can do the
! first divide step without thinking. BUT, the others are conditional,
! and are only done if %o3 >= 0. Because both %o3 and %o5 may have the high-
! order bit set in the first step, just falling into the regular
! division loop will mess up the first time around.
! So we unroll slightly...
LOC(do_single_div):
subcc %g2, 1, %g2
bl LOC(end_regular_divide)
nop
sub %o3, %o5, %o3
mov 1, %o2
b LOC(end_single_divloop)
nop
LOC(single_divloop):
sll %o2, 1, %o2
bl 1f
srl %o5, 1, %o5
! %o3 >= 0
sub %o3, %o5, %o3
b 2f
add %o2, 1, %o2
1: ! %o3 < 0
add %o3, %o5, %o3
sub %o2, 1, %o2
2:
LOC(end_single_divloop):
subcc %g2, 1, %g2
bge LOC(single_divloop)
tst %o3
b,a LOC(end_regular_divide)
LOC(not_really_big):
1:
sll %o5, 4, %o5
cmp %o5, %o3
bleu 1b
addcc %o4, 1, %o4
be LOC(got_result)
sub %o4, 1, %o4
tst %o3 ! set up for initial iteration
LOC(divloop):
sll %o2, 4, %o2
! depth 1, accumulated bits 0
bl LOC(1.16)
srl %o5,1,%o5
! remainder is positive
subcc %o3,%o5,%o3
! depth 2, accumulated bits 1
bl LOC(2.17)
srl %o5,1,%o5
! remainder is positive
subcc %o3,%o5,%o3
! depth 3, accumulated bits 3
bl LOC(3.19)
srl %o5,1,%o5
! remainder is positive
subcc %o3,%o5,%o3
! depth 4, accumulated bits 7
bl LOC(4.23)
srl %o5,1,%o5
! remainder is positive
subcc %o3,%o5,%o3
b 9f
add %o2, (7*2+1), %o2
LOC(4.23):
! remainder is negative
addcc %o3,%o5,%o3
b 9f
add %o2, (7*2-1), %o2
LOC(3.19):
! remainder is negative
addcc %o3,%o5,%o3
! depth 4, accumulated bits 5
bl LOC(4.21)
srl %o5,1,%o5
! remainder is positive
subcc %o3,%o5,%o3
b 9f
add %o2, (5*2+1), %o2
LOC(4.21):
! remainder is negative
addcc %o3,%o5,%o3
b 9f
add %o2, (5*2-1), %o2
LOC(2.17):
! remainder is negative
addcc %o3,%o5,%o3
! depth 3, accumulated bits 1
bl LOC(3.17)
srl %o5,1,%o5
! remainder is positive
subcc %o3,%o5,%o3
! depth 4, accumulated bits 3
bl LOC(4.19)
srl %o5,1,%o5
! remainder is positive
subcc %o3,%o5,%o3
b 9f
add %o2, (3*2+1), %o2
LOC(4.19):
! remainder is negative
addcc %o3,%o5,%o3
b 9f
add %o2, (3*2-1), %o2
LOC(3.17):
! remainder is negative
addcc %o3,%o5,%o3
! depth 4, accumulated bits 1
bl LOC(4.17)
srl %o5,1,%o5
! remainder is positive
subcc %o3,%o5,%o3
b 9f
add %o2, (1*2+1), %o2
LOC(4.17):
! remainder is negative
addcc %o3,%o5,%o3
b 9f
add %o2, (1*2-1), %o2
LOC(1.16):
! remainder is negative
addcc %o3,%o5,%o3
! depth 2, accumulated bits -1
bl LOC(2.15)
srl %o5,1,%o5
! remainder is positive
subcc %o3,%o5,%o3
! depth 3, accumulated bits -1
bl LOC(3.15)
srl %o5,1,%o5
! remainder is positive
subcc %o3,%o5,%o3
! depth 4, accumulated bits -1
bl LOC(4.15)
srl %o5,1,%o5
! remainder is positive
subcc %o3,%o5,%o3
b 9f
add %o2, (-1*2+1), %o2
LOC(4.15):
! remainder is negative
addcc %o3,%o5,%o3
b 9f
add %o2, (-1*2-1), %o2
LOC(3.15):
! remainder is negative
addcc %o3,%o5,%o3
! depth 4, accumulated bits -3
bl LOC(4.13)
srl %o5,1,%o5
! remainder is positive
subcc %o3,%o5,%o3
b 9f
add %o2, (-3*2+1), %o2
LOC(4.13):
! remainder is negative
addcc %o3,%o5,%o3
b 9f
add %o2, (-3*2-1), %o2
LOC(2.15):
! remainder is negative
addcc %o3,%o5,%o3
! depth 3, accumulated bits -3
bl LOC(3.13)
srl %o5,1,%o5
! remainder is positive
subcc %o3,%o5,%o3
! depth 4, accumulated bits -5
bl LOC(4.11)
srl %o5,1,%o5
! remainder is positive
subcc %o3,%o5,%o3
b 9f
add %o2, (-5*2+1), %o2
LOC(4.11):
! remainder is negative
addcc %o3,%o5,%o3
b 9f
add %o2, (-5*2-1), %o2
LOC(3.13):
! remainder is negative
addcc %o3,%o5,%o3
! depth 4, accumulated bits -7
bl LOC(4.9)
srl %o5,1,%o5
! remainder is positive
subcc %o3,%o5,%o3
b 9f
add %o2, (-7*2+1), %o2
LOC(4.9):
! remainder is negative
addcc %o3,%o5,%o3
b 9f
add %o2, (-7*2-1), %o2
9:
LOC(end_regular_divide):
subcc %o4, 1, %o4
bge LOC(divloop)
tst %o3
bl,a LOC(got_result)
! non-restoring fixup here (one instruction only!)
sub %o2, 1, %o2
LOC(got_result):
retl
mov %o2, %o0
END(.udiv)
strong_alias (.udiv, __wrap_.udiv)