mirror of
https://sourceware.org/git/glibc.git
synced 2024-11-26 06:50:07 +00:00
Updated from ../=mpn/gmp-1.906.7
This commit is contained in:
parent
3a29975f0a
commit
7def3d92a4
119
sysdeps/alpha/add_n.s
Normal file
119
sysdeps/alpha/add_n.s
Normal file
@ -0,0 +1,119 @@
|
|||||||
|
# Alpha __mpn_add_n -- Add two limb vectors of the same length > 0 and
|
||||||
|
# store sum in a third limb vector.
|
||||||
|
|
||||||
|
# Copyright (C) 1995 Free Software Foundation, Inc.
|
||||||
|
|
||||||
|
# This file is part of the GNU MP Library.
|
||||||
|
|
||||||
|
# The GNU MP Library is free software; you can redistribute it and/or modify
|
||||||
|
# it under the terms of the GNU Library General Public License as published by
|
||||||
|
# the Free Software Foundation; either version 2 of the License, or (at your
|
||||||
|
# option) any later version.
|
||||||
|
|
||||||
|
# The GNU MP Library is distributed in the hope that it will be useful, but
|
||||||
|
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
||||||
|
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
|
||||||
|
# License for more details.
|
||||||
|
|
||||||
|
# You should have received a copy of the GNU Library General Public License
|
||||||
|
# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
|
||||||
|
# the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||||
|
|
||||||
|
|
||||||
|
# INPUT PARAMETERS
|
||||||
|
# res_ptr $16
|
||||||
|
# s1_ptr $17
|
||||||
|
# s2_ptr $18
|
||||||
|
# size $19
|
||||||
|
|
||||||
|
.set noreorder
|
||||||
|
.set noat
|
||||||
|
.text
|
||||||
|
.align 3
|
||||||
|
.globl __mpn_add_n
|
||||||
|
.ent __mpn_add_n
|
||||||
|
__mpn_add_n:
|
||||||
|
.frame $30,0,$26,0
|
||||||
|
|
||||||
|
ldq $3,0($17)
|
||||||
|
ldq $4,0($18)
|
||||||
|
|
||||||
|
subq $19,1,$19
|
||||||
|
and $19,4-1,$2 # number of limbs in first loop
|
||||||
|
bis $31,$31,$0
|
||||||
|
beq $2,.L0 # if multiple of 4 limbs, skip first loop
|
||||||
|
|
||||||
|
subq $19,$2,$19
|
||||||
|
|
||||||
|
.Loop0: subq $2,1,$2
|
||||||
|
ldq $5,8($17)
|
||||||
|
addq $4,$0,$4
|
||||||
|
ldq $6,8($18)
|
||||||
|
cmpult $4,$0,$1
|
||||||
|
addq $3,$4,$4
|
||||||
|
cmpult $4,$3,$0
|
||||||
|
stq $4,0($16)
|
||||||
|
or $0,$1,$0
|
||||||
|
|
||||||
|
addq $17,8,$17
|
||||||
|
addq $18,8,$18
|
||||||
|
bis $5,$5,$3
|
||||||
|
bis $6,$6,$4
|
||||||
|
addq $16,8,$16
|
||||||
|
bne $2,.Loop0
|
||||||
|
|
||||||
|
.L0: beq $19,.Lend
|
||||||
|
|
||||||
|
.align 3
|
||||||
|
.Loop: subq $19,4,$19
|
||||||
|
|
||||||
|
ldq $5,8($17)
|
||||||
|
addq $4,$0,$4
|
||||||
|
ldq $6,8($18)
|
||||||
|
cmpult $4,$0,$1
|
||||||
|
addq $3,$4,$4
|
||||||
|
cmpult $4,$3,$0
|
||||||
|
stq $4,0($16)
|
||||||
|
or $0,$1,$0
|
||||||
|
|
||||||
|
ldq $3,16($17)
|
||||||
|
addq $6,$0,$6
|
||||||
|
ldq $4,16($18)
|
||||||
|
cmpult $6,$0,$1
|
||||||
|
addq $5,$6,$6
|
||||||
|
cmpult $6,$5,$0
|
||||||
|
stq $6,8($16)
|
||||||
|
or $0,$1,$0
|
||||||
|
|
||||||
|
ldq $5,24($17)
|
||||||
|
addq $4,$0,$4
|
||||||
|
ldq $6,24($18)
|
||||||
|
cmpult $4,$0,$1
|
||||||
|
addq $3,$4,$4
|
||||||
|
cmpult $4,$3,$0
|
||||||
|
stq $4,16($16)
|
||||||
|
or $0,$1,$0
|
||||||
|
|
||||||
|
ldq $3,32($17)
|
||||||
|
addq $6,$0,$6
|
||||||
|
ldq $4,32($18)
|
||||||
|
cmpult $6,$0,$1
|
||||||
|
addq $5,$6,$6
|
||||||
|
cmpult $6,$5,$0
|
||||||
|
stq $6,24($16)
|
||||||
|
or $0,$1,$0
|
||||||
|
|
||||||
|
addq $17,32,$17
|
||||||
|
addq $18,32,$18
|
||||||
|
addq $16,32,$16
|
||||||
|
bne $19,.Loop
|
||||||
|
|
||||||
|
.Lend: addq $4,$0,$4
|
||||||
|
cmpult $4,$0,$1
|
||||||
|
addq $3,$4,$4
|
||||||
|
cmpult $4,$3,$0
|
||||||
|
stq $4,0($16)
|
||||||
|
or $0,$1,$0
|
||||||
|
ret $31,($26),1
|
||||||
|
|
||||||
|
.end __mpn_add_n
|
100
sysdeps/alpha/addmul_1.s
Normal file
100
sysdeps/alpha/addmul_1.s
Normal file
@ -0,0 +1,100 @@
|
|||||||
|
# Alpha 21064 __mpn_addmul_1 -- Multiply a limb vector with a limb and add
|
||||||
|
# the result to a second limb vector.
|
||||||
|
|
||||||
|
# Copyright (C) 1992, 1994, 1995 Free Software Foundation, Inc.
|
||||||
|
|
||||||
|
# This file is part of the GNU MP Library.
|
||||||
|
|
||||||
|
# The GNU MP Library is free software; you can redistribute it and/or modify
|
||||||
|
# it under the terms of the GNU Library General Public License as published by
|
||||||
|
# the Free Software Foundation; either version 2 of the License, or (at your
|
||||||
|
# option) any later version.
|
||||||
|
|
||||||
|
# The GNU MP Library is distributed in the hope that it will be useful, but
|
||||||
|
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
||||||
|
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
|
||||||
|
# License for more details.
|
||||||
|
|
||||||
|
# You should have received a copy of the GNU Library General Public License
|
||||||
|
# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
|
||||||
|
# the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||||
|
|
||||||
|
|
||||||
|
# INPUT PARAMETERS
|
||||||
|
# res_ptr r16
|
||||||
|
# s1_ptr r17
|
||||||
|
# size r18
|
||||||
|
# s2_limb r19
|
||||||
|
|
||||||
|
# This code runs at 42 cycles/limb on the 21064.
|
||||||
|
|
||||||
|
# To improve performance for long multiplications, we would use
|
||||||
|
# 'fetch' for S1 and 'fetch_m' for RES. It's not obvious how to use
|
||||||
|
# these instructions without slowing down the general code: 1. We can
|
||||||
|
# only have two prefetches in operation at any time in the Alpha
|
||||||
|
# architecture. 2. There will seldom be any special alignment
|
||||||
|
# between RES_PTR and S1_PTR. Maybe we can simply divide the current
|
||||||
|
# loop into an inner and outer loop, having the inner loop handle
|
||||||
|
# exactly one prefetch block?
|
||||||
|
|
||||||
|
.set noreorder
|
||||||
|
.set noat
|
||||||
|
.text
|
||||||
|
.align 3
|
||||||
|
.globl __mpn_addmul_1
|
||||||
|
.ent __mpn_addmul_1 2
|
||||||
|
__mpn_addmul_1:
|
||||||
|
.frame $30,0,$26
|
||||||
|
|
||||||
|
ldq $2,0($17) # $2 = s1_limb
|
||||||
|
addq $17,8,$17 # s1_ptr++
|
||||||
|
subq $18,1,$18 # size--
|
||||||
|
mulq $2,$19,$3 # $3 = prod_low
|
||||||
|
ldq $5,0($16) # $5 = *res_ptr
|
||||||
|
umulh $2,$19,$0 # $0 = prod_high
|
||||||
|
beq $18,Lend1 # jump if size was == 1
|
||||||
|
ldq $2,0($17) # $2 = s1_limb
|
||||||
|
addq $17,8,$17 # s1_ptr++
|
||||||
|
subq $18,1,$18 # size--
|
||||||
|
addq $5,$3,$3
|
||||||
|
cmpult $3,$5,$4
|
||||||
|
stq $3,0($16)
|
||||||
|
addq $16,8,$16 # res_ptr++
|
||||||
|
beq $18,Lend2 # jump if size was == 2
|
||||||
|
|
||||||
|
.align 3
|
||||||
|
Loop: mulq $2,$19,$3 # $3 = prod_low
|
||||||
|
ldq $5,0($16) # $5 = *res_ptr
|
||||||
|
addq $4,$0,$0 # cy_limb = cy_limb + 'cy'
|
||||||
|
subq $18,1,$18 # size--
|
||||||
|
umulh $2,$19,$4 # $4 = cy_limb
|
||||||
|
ldq $2,0($17) # $2 = s1_limb
|
||||||
|
addq $17,8,$17 # s1_ptr++
|
||||||
|
addq $3,$0,$3 # $3 = cy_limb + prod_low
|
||||||
|
cmpult $3,$0,$0 # $0 = carry from (cy_limb + prod_low)
|
||||||
|
addq $5,$3,$3
|
||||||
|
cmpult $3,$5,$5
|
||||||
|
stq $3,0($16)
|
||||||
|
addq $16,8,$16 # res_ptr++
|
||||||
|
addq $5,$0,$0 # combine carries
|
||||||
|
bne $18,Loop
|
||||||
|
|
||||||
|
Lend2: mulq $2,$19,$3 # $3 = prod_low
|
||||||
|
ldq $5,0($16) # $5 = *res_ptr
|
||||||
|
addq $4,$0,$0 # cy_limb = cy_limb + 'cy'
|
||||||
|
umulh $2,$19,$4 # $4 = cy_limb
|
||||||
|
addq $3,$0,$3 # $3 = cy_limb + prod_low
|
||||||
|
cmpult $3,$0,$0 # $0 = carry from (cy_limb + prod_low)
|
||||||
|
addq $5,$3,$3
|
||||||
|
cmpult $3,$5,$5
|
||||||
|
stq $3,0($16)
|
||||||
|
addq $5,$0,$0 # combine carries
|
||||||
|
addq $4,$0,$0 # cy_limb = prod_high + cy
|
||||||
|
ret $31,($26),1
|
||||||
|
Lend1: addq $5,$3,$3
|
||||||
|
cmpult $3,$5,$5
|
||||||
|
stq $3,0($16)
|
||||||
|
addq $0,$5,$0
|
||||||
|
ret $31,($26),1
|
||||||
|
|
||||||
|
.end __mpn_addmul_1
|
118
sysdeps/alpha/alphaev5/add_n.s
Normal file
118
sysdeps/alpha/alphaev5/add_n.s
Normal file
@ -0,0 +1,118 @@
|
|||||||
|
# Alpha __mpn_add_n -- Add two limb vectors of the same length > 0 and
|
||||||
|
# store sum in a third limb vector.
|
||||||
|
|
||||||
|
# Copyright (C) 1995 Free Software Foundation, Inc.
|
||||||
|
|
||||||
|
# This file is part of the GNU MP Library.
|
||||||
|
|
||||||
|
# The GNU MP Library is free software; you can redistribute it and/or modify
|
||||||
|
# it under the terms of the GNU Library General Public License as published by
|
||||||
|
# the Free Software Foundation; either version 2 of the License, or (at your
|
||||||
|
# option) any later version.
|
||||||
|
|
||||||
|
# The GNU MP Library is distributed in the hope that it will be useful, but
|
||||||
|
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
||||||
|
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
|
||||||
|
# License for more details.
|
||||||
|
|
||||||
|
# You should have received a copy of the GNU Library General Public License
|
||||||
|
# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
|
||||||
|
# the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||||
|
|
||||||
|
|
||||||
|
# INPUT PARAMETERS
|
||||||
|
# res_ptr $16
|
||||||
|
# s1_ptr $17
|
||||||
|
# s2_ptr $18
|
||||||
|
# size $19
|
||||||
|
|
||||||
|
.set noreorder
|
||||||
|
.set noat
|
||||||
|
.text
|
||||||
|
.align 3
|
||||||
|
.globl __mpn_add_n
|
||||||
|
.ent __mpn_add_n
|
||||||
|
__mpn_add_n:
|
||||||
|
.frame $30,0,$26,0
|
||||||
|
|
||||||
|
ldq $3,0($17)
|
||||||
|
ldq $4,0($18)
|
||||||
|
|
||||||
|
subq $19,1,$19
|
||||||
|
and $19,4-1,$2 # number of limbs in first loop
|
||||||
|
bis $31,$31,$0
|
||||||
|
beq $2,.L0 # if multiple of 4 limbs, skip first loop
|
||||||
|
|
||||||
|
subq $19,$2,$19
|
||||||
|
|
||||||
|
.Loop0: subq $2,1,$2
|
||||||
|
ldq $5,8($17)
|
||||||
|
addq $4,$0,$4
|
||||||
|
ldq $6,8($18)
|
||||||
|
cmpult $4,$0,$1
|
||||||
|
addq $3,$4,$4
|
||||||
|
cmpult $4,$3,$0
|
||||||
|
stq $4,0($16)
|
||||||
|
or $0,$1,$0
|
||||||
|
|
||||||
|
addq $17,8,$17
|
||||||
|
addq $18,8,$18
|
||||||
|
bis $5,$5,$3
|
||||||
|
bis $6,$6,$4
|
||||||
|
addq $16,8,$16
|
||||||
|
bne $2,.Loop0
|
||||||
|
|
||||||
|
.L0: beq $19,.Lend
|
||||||
|
|
||||||
|
.align 4
|
||||||
|
.Loop: subq $19,4,$19
|
||||||
|
unop
|
||||||
|
|
||||||
|
ldq $6,8($18)
|
||||||
|
addq $4,$0,$0
|
||||||
|
ldq $5,8($17)
|
||||||
|
cmpult $0,$4,$1
|
||||||
|
ldq $4,16($18)
|
||||||
|
addq $3,$0,$20
|
||||||
|
cmpult $20,$3,$0
|
||||||
|
ldq $3,16($17)
|
||||||
|
or $0,$1,$0
|
||||||
|
addq $6,$0,$0
|
||||||
|
cmpult $0,$6,$1
|
||||||
|
ldq $6,24($18)
|
||||||
|
addq $5,$0,$21
|
||||||
|
cmpult $21,$5,$0
|
||||||
|
ldq $5,24($17)
|
||||||
|
or $0,$1,$0
|
||||||
|
addq $4,$0,$0
|
||||||
|
cmpult $0,$4,$1
|
||||||
|
ldq $4,32($18)
|
||||||
|
addq $3,$0,$22
|
||||||
|
cmpult $22,$3,$0
|
||||||
|
ldq $3,32($17)
|
||||||
|
or $0,$1,$0
|
||||||
|
addq $6,$0,$0
|
||||||
|
cmpult $0,$6,$1
|
||||||
|
addq $5,$0,$23
|
||||||
|
cmpult $23,$5,$0
|
||||||
|
or $0,$1,$0
|
||||||
|
|
||||||
|
stq $20,0($16)
|
||||||
|
stq $21,8($16)
|
||||||
|
stq $22,16($16)
|
||||||
|
stq $23,24($16)
|
||||||
|
|
||||||
|
addq $17,32,$17
|
||||||
|
addq $18,32,$18
|
||||||
|
addq $16,32,$16
|
||||||
|
bne $19,.Loop
|
||||||
|
|
||||||
|
.Lend: addq $4,$0,$4
|
||||||
|
cmpult $4,$0,$1
|
||||||
|
addq $3,$4,$4
|
||||||
|
cmpult $4,$3,$0
|
||||||
|
stq $4,0($16)
|
||||||
|
or $0,$1,$0
|
||||||
|
ret $31,($26),1
|
||||||
|
|
||||||
|
.end __mpn_add_n
|
175
sysdeps/alpha/alphaev5/lshift.s
Normal file
175
sysdeps/alpha/alphaev5/lshift.s
Normal file
@ -0,0 +1,175 @@
|
|||||||
|
# Alpha EV5 __mpn_lshift --
|
||||||
|
|
||||||
|
# Copyright (C) 1994, 1995 Free Software Foundation, Inc.
|
||||||
|
|
||||||
|
# This file is part of the GNU MP Library.
|
||||||
|
|
||||||
|
# The GNU MP Library is free software; you can redistribute it and/or modify
|
||||||
|
# it under the terms of the GNU Library General Public License as published by
|
||||||
|
# the Free Software Foundation; either version 2 of the License, or (at your
|
||||||
|
# option) any later version.
|
||||||
|
|
||||||
|
# The GNU MP Library is distributed in the hope that it will be useful, but
|
||||||
|
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
||||||
|
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
|
||||||
|
# License for more details.
|
||||||
|
|
||||||
|
# You should have received a copy of the GNU Library General Public License
|
||||||
|
# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
|
||||||
|
# the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||||
|
|
||||||
|
|
||||||
|
# INPUT PARAMETERS
|
||||||
|
# res_ptr r16
|
||||||
|
# s1_ptr r17
|
||||||
|
# size r18
|
||||||
|
# cnt r19
|
||||||
|
|
||||||
|
# This code runs at 4.25 cycles/limb on the EV5.
|
||||||
|
|
||||||
|
.set noreorder
|
||||||
|
.set noat
|
||||||
|
.text
|
||||||
|
.align 3
|
||||||
|
.globl __mpn_lshift
|
||||||
|
.ent __mpn_lshift
|
||||||
|
__mpn_lshift:
|
||||||
|
.frame $30,0,$26,0
|
||||||
|
|
||||||
|
s8addq $18,$17,$17 # make r17 point at end of s1
|
||||||
|
ldq $4,-8($17) # load first limb
|
||||||
|
subq $31,$19,$20
|
||||||
|
s8addq $18,$16,$16 # make r16 point at end of RES
|
||||||
|
subq $18,1,$18
|
||||||
|
and $18,4-1,$28 # number of limbs in first loop
|
||||||
|
srl $4,$20,$0 # compute function result
|
||||||
|
|
||||||
|
beq $28,L0
|
||||||
|
subq $18,$28,$18
|
||||||
|
|
||||||
|
.align 3
|
||||||
|
Loop0: ldq $3,-16($17)
|
||||||
|
subq $16,8,$16
|
||||||
|
sll $4,$19,$5
|
||||||
|
subq $17,8,$17
|
||||||
|
subq $28,1,$28
|
||||||
|
srl $3,$20,$6
|
||||||
|
or $3,$3,$4
|
||||||
|
or $5,$6,$8
|
||||||
|
stq $8,0($16)
|
||||||
|
bne $28,Loop0
|
||||||
|
|
||||||
|
L0: sll $4,$19,$24
|
||||||
|
beq $18,Lend
|
||||||
|
# warm up phase 1
|
||||||
|
ldq $1,-16($17)
|
||||||
|
subq $18,4,$18
|
||||||
|
ldq $2,-24($17)
|
||||||
|
ldq $3,-32($17)
|
||||||
|
ldq $4,-40($17)
|
||||||
|
beq $18,Lcool1
|
||||||
|
# warm up phase 2
|
||||||
|
srl $1,$20,$7
|
||||||
|
sll $1,$19,$21
|
||||||
|
srl $2,$20,$8
|
||||||
|
ldq $1,-48($17)
|
||||||
|
sll $2,$19,$22
|
||||||
|
ldq $2,-56($17)
|
||||||
|
srl $3,$20,$5
|
||||||
|
or $7,$24,$7
|
||||||
|
sll $3,$19,$23
|
||||||
|
or $8,$21,$8
|
||||||
|
srl $4,$20,$6
|
||||||
|
ldq $3,-64($17)
|
||||||
|
sll $4,$19,$24
|
||||||
|
ldq $4,-72($17)
|
||||||
|
subq $18,4,$18
|
||||||
|
beq $18,Lcool1
|
||||||
|
.align 4
|
||||||
|
# main loop
|
||||||
|
Loop: stq $7,-8($16)
|
||||||
|
or $5,$22,$5
|
||||||
|
stq $8,-16($16)
|
||||||
|
or $6,$23,$6
|
||||||
|
|
||||||
|
srl $1,$20,$7
|
||||||
|
subq $18,4,$18
|
||||||
|
sll $1,$19,$21
|
||||||
|
unop # ldq $31,-96($17)
|
||||||
|
|
||||||
|
srl $2,$20,$8
|
||||||
|
ldq $1,-80($17)
|
||||||
|
sll $2,$19,$22
|
||||||
|
ldq $2,-88($17)
|
||||||
|
|
||||||
|
stq $5,-24($16)
|
||||||
|
or $7,$24,$7
|
||||||
|
stq $6,-32($16)
|
||||||
|
or $8,$21,$8
|
||||||
|
|
||||||
|
srl $3,$20,$5
|
||||||
|
unop # ldq $31,-96($17)
|
||||||
|
sll $3,$19,$23
|
||||||
|
subq $16,32,$16
|
||||||
|
|
||||||
|
srl $4,$20,$6
|
||||||
|
ldq $3,-96($17
|
||||||
|
sll $4,$19,$24
|
||||||
|
ldq $4,-104($17)
|
||||||
|
|
||||||
|
subq $17,32,$17
|
||||||
|
bne $18,Loop
|
||||||
|
unop
|
||||||
|
unop
|
||||||
|
# cool down phase 2/1
|
||||||
|
Lcool1: stq $7,-8($16)
|
||||||
|
or $5,$22,$5
|
||||||
|
stq $8,-16($16)
|
||||||
|
or $6,$23,$6
|
||||||
|
srl $1,$20,$7
|
||||||
|
sll $1,$19,$21
|
||||||
|
srl $2,$20,$8
|
||||||
|
sll $2,$19,$22
|
||||||
|
stq $5,-24($16)
|
||||||
|
or $7,$24,$7
|
||||||
|
stq $6,-32($16)
|
||||||
|
or $8,$21,$8
|
||||||
|
srl $3,$20,$5
|
||||||
|
sll $3,$19,$23
|
||||||
|
srl $4,$20,$6
|
||||||
|
sll $4,$19,$24
|
||||||
|
# cool down phase 2/2
|
||||||
|
stq $7,-40($16)
|
||||||
|
or $5,$22,$5
|
||||||
|
stq $8,-48($16)
|
||||||
|
or $6,$23,$6
|
||||||
|
stq $5,-56($16)
|
||||||
|
stq $6,-64($16)
|
||||||
|
# cool down phase 2/3
|
||||||
|
stq $24,-72($16)
|
||||||
|
ret $31,($26),1
|
||||||
|
|
||||||
|
# cool down phase 1/1
|
||||||
|
Lcool1: srl $1,$20,$7
|
||||||
|
sll $1,$19,$21
|
||||||
|
srl $2,$20,$8
|
||||||
|
sll $2,$19,$22
|
||||||
|
srl $3,$20,$5
|
||||||
|
or $7,$24,$7
|
||||||
|
sll $3,$19,$23
|
||||||
|
or $8,$21,$8
|
||||||
|
srl $4,$20,$6
|
||||||
|
sll $4,$19,$24
|
||||||
|
# cool down phase 1/2
|
||||||
|
stq $7,-8($16)
|
||||||
|
or $5,$22,$5
|
||||||
|
stq $8,-16($16)
|
||||||
|
or $6,$23,$6
|
||||||
|
stq $5,-24($16)
|
||||||
|
stq $6,-32($16)
|
||||||
|
stq $24,-40($16)
|
||||||
|
ret $31,($26),1
|
||||||
|
|
||||||
|
Lend stq $24,-8($16)
|
||||||
|
ret $31,($26),1
|
||||||
|
.end __mpn_lshift
|
173
sysdeps/alpha/alphaev5/rshift.s
Normal file
173
sysdeps/alpha/alphaev5/rshift.s
Normal file
@ -0,0 +1,173 @@
|
|||||||
|
# Alpha EV5 __mpn_rshift --
|
||||||
|
|
||||||
|
# Copyright (C) 1994, 1995 Free Software Foundation, Inc.
|
||||||
|
|
||||||
|
# This file is part of the GNU MP Library.
|
||||||
|
|
||||||
|
# The GNU MP Library is free software; you can redistribute it and/or modify
|
||||||
|
# it under the terms of the GNU Library General Public License as published by
|
||||||
|
# the Free Software Foundation; either version 2 of the License, or (at your
|
||||||
|
# option) any later version.
|
||||||
|
|
||||||
|
# The GNU MP Library is distributed in the hope that it will be useful, but
|
||||||
|
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
||||||
|
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
|
||||||
|
# License for more details.
|
||||||
|
|
||||||
|
# You should have received a copy of the GNU Library General Public License
|
||||||
|
# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
|
||||||
|
# the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||||
|
|
||||||
|
|
||||||
|
# INPUT PARAMETERS
|
||||||
|
# res_ptr r16
|
||||||
|
# s1_ptr r17
|
||||||
|
# size r18
|
||||||
|
# cnt r19
|
||||||
|
|
||||||
|
# This code runs at 4.25 cycles/limb on the EV5.
|
||||||
|
|
||||||
|
.set noreorder
|
||||||
|
.set noat
|
||||||
|
.text
|
||||||
|
.align 3
|
||||||
|
.globl __mpn_rshift
|
||||||
|
.ent __mpn_rshift
|
||||||
|
__mpn_rshift:
|
||||||
|
.frame $30,0,$26,0
|
||||||
|
|
||||||
|
ldq $4,0($17) # load first limb
|
||||||
|
subq $31,$19,$20
|
||||||
|
subq $18,1,$18
|
||||||
|
and $18,4-1,$28 # number of limbs in first loop
|
||||||
|
sll $4,$20,$0 # compute function result
|
||||||
|
|
||||||
|
beq $28,L0
|
||||||
|
subq $18,$28,$18
|
||||||
|
|
||||||
|
.align 3
|
||||||
|
Loop0: ldq $3,8($17)
|
||||||
|
addq $16,8,$16
|
||||||
|
srl $4,$19,$5
|
||||||
|
addq $17,8,$17
|
||||||
|
subq $28,1,$28
|
||||||
|
sll $3,$20,$6
|
||||||
|
or $3,$3,$4
|
||||||
|
or $5,$6,$8
|
||||||
|
stq $8,-8($16)
|
||||||
|
bne $28,Loop0
|
||||||
|
|
||||||
|
L0: srl $4,$19,$24
|
||||||
|
beq $18,Lend
|
||||||
|
# warm up phase 1
|
||||||
|
ldq $1,8($17)
|
||||||
|
subq $18,4,$18
|
||||||
|
ldq $2,16($17)
|
||||||
|
ldq $3,24($17)
|
||||||
|
ldq $4,32($17)
|
||||||
|
beq $18,Lcool1
|
||||||
|
# warm up phase 2
|
||||||
|
sll $1,$20,$7
|
||||||
|
srl $1,$19,$21
|
||||||
|
sll $2,$20,$8
|
||||||
|
ldq $1,40($17)
|
||||||
|
srl $2,$19,$22
|
||||||
|
ldq $2,48($17)
|
||||||
|
sll $3,$20,$5
|
||||||
|
or $7,$24,$7
|
||||||
|
srl $3,$19,$23
|
||||||
|
or $8,$21,$8
|
||||||
|
sll $4,$20,$6
|
||||||
|
ldq $3,56($17)
|
||||||
|
srl $4,$19,$24
|
||||||
|
ldq $4,64($17)
|
||||||
|
subq $18,4,$18
|
||||||
|
beq $18,Lcool2
|
||||||
|
.align 4
|
||||||
|
# main loop
|
||||||
|
Loop: stq $7,0($16)
|
||||||
|
or $5,$22,$5
|
||||||
|
stq $8,8($16)
|
||||||
|
or $6,$23,$6
|
||||||
|
|
||||||
|
sll $1,$20,$7
|
||||||
|
subq $18,4,$18
|
||||||
|
srl $1,$19,$21
|
||||||
|
unop # ldq $31,-96($17)
|
||||||
|
|
||||||
|
sll $2,$20,$8
|
||||||
|
ldq $1,72($17)
|
||||||
|
srl $2,$19,$22
|
||||||
|
ldq $2,80($17)
|
||||||
|
|
||||||
|
stq $5,16($16)
|
||||||
|
or $7,$24,$7
|
||||||
|
stq $6,24($16)
|
||||||
|
or $8,$21,$8
|
||||||
|
|
||||||
|
sll $3,$20,$5
|
||||||
|
unop # ldq $31,-96($17)
|
||||||
|
srl $3,$19,$23
|
||||||
|
addq $16,32,$16
|
||||||
|
|
||||||
|
sll $4,$20,$6
|
||||||
|
ldq $3,88($17)
|
||||||
|
srl $4,$19,$24
|
||||||
|
ldq $4,96($17)
|
||||||
|
|
||||||
|
addq $17,32,$17
|
||||||
|
bne $18,Loop
|
||||||
|
unop
|
||||||
|
unop
|
||||||
|
# cool down phase 2/1
|
||||||
|
Lcool2: stq $7,0($16)
|
||||||
|
or $5,$22,$5
|
||||||
|
stq $8,8($16)
|
||||||
|
or $6,$23,$6
|
||||||
|
sll $1,$20,$7
|
||||||
|
srl $1,$19,$21
|
||||||
|
sll $2,$20,$8
|
||||||
|
srl $2,$19,$22
|
||||||
|
stq $5,16($16)
|
||||||
|
or $7,$24,$7
|
||||||
|
stq $6,24($16)
|
||||||
|
or $8,$21,$8
|
||||||
|
sll $3,$20,$5
|
||||||
|
srl $3,$19,$23
|
||||||
|
sll $4,$20,$6
|
||||||
|
srl $4,$19,$24
|
||||||
|
# cool down phase 2/2
|
||||||
|
stq $7,32($16)
|
||||||
|
or $5,$22,$5
|
||||||
|
stq $8,40($16)
|
||||||
|
or $6,$23,$6
|
||||||
|
stq $5,48($16)
|
||||||
|
stq $6,56($16)
|
||||||
|
# cool down phase 2/3
|
||||||
|
stq $24,64($16)
|
||||||
|
ret $31,($26),1
|
||||||
|
|
||||||
|
# cool down phase 1/1
|
||||||
|
Lcool1: sll $1,$20,$7
|
||||||
|
srl $1,$19,$21
|
||||||
|
sll $2,$20,$8
|
||||||
|
srl $2,$19,$22
|
||||||
|
sll $3,$20,$5
|
||||||
|
or $7,$24,$7
|
||||||
|
srl $3,$19,$23
|
||||||
|
or $8,$21,$8
|
||||||
|
sll $4,$20,$6
|
||||||
|
srl $4,$19,$24
|
||||||
|
# cool down phase 1/2
|
||||||
|
stq $7,0($16)
|
||||||
|
or $5,$22,$5
|
||||||
|
stq $8,8($16)
|
||||||
|
or $6,$23,$6
|
||||||
|
stq $5,16($16)
|
||||||
|
stq $6,24($16)
|
||||||
|
stq $24,32($16)
|
||||||
|
ret $31,($26),1
|
||||||
|
|
||||||
|
Lend: stq $24,0($16)
|
||||||
|
ret $31,($26),1
|
||||||
|
.end __mpn_rshift
|
108
sysdeps/alpha/lshift.s
Normal file
108
sysdeps/alpha/lshift.s
Normal file
@ -0,0 +1,108 @@
|
|||||||
|
# Alpha 21064 __mpn_lshift --
|
||||||
|
|
||||||
|
# Copyright (C) 1994, 1995 Free Software Foundation, Inc.
|
||||||
|
|
||||||
|
# This file is part of the GNU MP Library.
|
||||||
|
|
||||||
|
# The GNU MP Library is free software; you can redistribute it and/or modify
|
||||||
|
# it under the terms of the GNU Library General Public License as published by
|
||||||
|
# the Free Software Foundation; either version 2 of the License, or (at your
|
||||||
|
# option) any later version.
|
||||||
|
|
||||||
|
# The GNU MP Library is distributed in the hope that it will be useful, but
|
||||||
|
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
||||||
|
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
|
||||||
|
# License for more details.
|
||||||
|
|
||||||
|
# You should have received a copy of the GNU Library General Public License
|
||||||
|
# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
|
||||||
|
# the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||||
|
|
||||||
|
|
||||||
|
# INPUT PARAMETERS
|
||||||
|
# res_ptr r16
|
||||||
|
# s1_ptr r17
|
||||||
|
# size r18
|
||||||
|
# cnt r19
|
||||||
|
|
||||||
|
# This code runs at 4.8 cycles/limb on the 21064. With infinite unrolling,
|
||||||
|
# it would take 4 cycles/limb. It should be possible to get down to 3
|
||||||
|
# cycles/limb since both ldq and stq can be paired with the other used
|
||||||
|
# instructions. But there are many restrictions in the 21064 pipeline that
|
||||||
|
# makes it hard, if not impossible, to get down to 3 cycles/limb:
|
||||||
|
|
||||||
|
# 1. ldq has a 3 cycle delay, srl and sll have a 2 cycle delay.
|
||||||
|
# 2. Only aligned instruction pairs can be paired.
|
||||||
|
# 3. The store buffer or silo might not be able to deal with the bandwidth.
|
||||||
|
|
||||||
|
.set noreorder
|
||||||
|
.set noat
|
||||||
|
.text
|
||||||
|
.align 3
|
||||||
|
.globl __mpn_lshift
|
||||||
|
.ent __mpn_lshift
|
||||||
|
__mpn_lshift:
|
||||||
|
.frame $30,0,$26,0
|
||||||
|
|
||||||
|
s8addq $18,$17,$17 # make r17 point at end of s1
|
||||||
|
ldq $4,-8($17) # load first limb
|
||||||
|
subq $17,8,$17
|
||||||
|
subq $31,$19,$7
|
||||||
|
s8addq $18,$16,$16 # make r16 point at end of RES
|
||||||
|
subq $18,1,$18
|
||||||
|
and $18,4-1,$20 # number of limbs in first loop
|
||||||
|
srl $4,$7,$0 # compute function result
|
||||||
|
|
||||||
|
beq $20,L0
|
||||||
|
subq $18,$20,$18
|
||||||
|
|
||||||
|
.align 3
|
||||||
|
Loop0:
|
||||||
|
ldq $3,-8($17)
|
||||||
|
subq $16,8,$16
|
||||||
|
subq $17,8,$17
|
||||||
|
subq $20,1,$20
|
||||||
|
sll $4,$19,$5
|
||||||
|
srl $3,$7,$6
|
||||||
|
bis $3,$3,$4
|
||||||
|
bis $5,$6,$8
|
||||||
|
stq $8,0($16)
|
||||||
|
bne $20,Loop0
|
||||||
|
|
||||||
|
L0: beq $18,Lend
|
||||||
|
|
||||||
|
.align 3
|
||||||
|
Loop: ldq $3,-8($17)
|
||||||
|
subq $16,32,$16
|
||||||
|
subq $18,4,$18
|
||||||
|
sll $4,$19,$5
|
||||||
|
srl $3,$7,$6
|
||||||
|
|
||||||
|
ldq $4,-16($17)
|
||||||
|
sll $3,$19,$1
|
||||||
|
bis $5,$6,$8
|
||||||
|
stq $8,24($16)
|
||||||
|
srl $4,$7,$2
|
||||||
|
|
||||||
|
ldq $3,-24($17)
|
||||||
|
sll $4,$19,$5
|
||||||
|
bis $1,$2,$8
|
||||||
|
stq $8,16($16)
|
||||||
|
srl $3,$7,$6
|
||||||
|
|
||||||
|
ldq $4,-32($17)
|
||||||
|
sll $3,$19,$1
|
||||||
|
bis $5,$6,$8
|
||||||
|
stq $8,8($16)
|
||||||
|
srl $4,$7,$2
|
||||||
|
|
||||||
|
subq $17,32,$17
|
||||||
|
bis $1,$2,$8
|
||||||
|
stq $8,0($16)
|
||||||
|
|
||||||
|
bgt $18,Loop
|
||||||
|
|
||||||
|
Lend: sll $4,$19,$8
|
||||||
|
stq $8,-8($16)
|
||||||
|
ret $31,($26),1
|
||||||
|
.end __mpn_lshift
|
84
sysdeps/alpha/mul_1.s
Normal file
84
sysdeps/alpha/mul_1.s
Normal file
@ -0,0 +1,84 @@
|
|||||||
|
# Alpha 21064 __mpn_mul_1 -- Multiply a limb vector with a limb and store
|
||||||
|
# the result in a second limb vector.
|
||||||
|
|
||||||
|
# Copyright (C) 1992, 1994 Free Software Foundation, Inc.
|
||||||
|
|
||||||
|
# This file is part of the GNU MP Library.
|
||||||
|
|
||||||
|
# The GNU MP Library is free software; you can redistribute it and/or modify
|
||||||
|
# it under the terms of the GNU Library General Public License as published by
|
||||||
|
# the Free Software Foundation; either version 2 of the License, or (at your
|
||||||
|
# option) any later version.
|
||||||
|
|
||||||
|
# The GNU MP Library is distributed in the hope that it will be useful, but
|
||||||
|
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
||||||
|
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
|
||||||
|
# License for more details.
|
||||||
|
|
||||||
|
# You should have received a copy of the GNU Library General Public License
|
||||||
|
# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
|
||||||
|
# the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||||
|
|
||||||
|
|
||||||
|
# INPUT PARAMETERS
|
||||||
|
# res_ptr r16
|
||||||
|
# s1_ptr r17
|
||||||
|
# size r18
|
||||||
|
# s2_limb r19
|
||||||
|
|
||||||
|
# This code runs at 42 cycles/limb on the EV4 and 18 cycles/limb on the EV5.
|
||||||
|
|
||||||
|
# To improve performance for long multiplications, we would use
|
||||||
|
# 'fetch' for S1 and 'fetch_m' for RES. It's not obvious how to use
|
||||||
|
# these instructions without slowing down the general code: 1. We can
|
||||||
|
# only have two prefetches in operation at any time in the Alpha
|
||||||
|
# architecture. 2. There will seldom be any special alignment
|
||||||
|
# between RES_PTR and S1_PTR. Maybe we can simply divide the current
|
||||||
|
# loop into an inner and outer loop, having the inner loop handle
|
||||||
|
# exactly one prefetch block?
|
||||||
|
|
||||||
|
.set noreorder
|
||||||
|
.set noat
|
||||||
|
.text
|
||||||
|
.align 3
|
||||||
|
.globl __mpn_mul_1
|
||||||
|
.ent __mpn_mul_1 2
|
||||||
|
__mpn_mul_1:
|
||||||
|
.frame $30,0,$26
|
||||||
|
|
||||||
|
ldq $2,0($17) # $2 = s1_limb
|
||||||
|
subq $18,1,$18 # size--
|
||||||
|
mulq $2,$19,$3 # $3 = prod_low
|
||||||
|
bic $31,$31,$4 # clear cy_limb
|
||||||
|
umulh $2,$19,$0 # $0 = prod_high
|
||||||
|
beq $18,Lend1 # jump if size was == 1
|
||||||
|
ldq $2,8($17) # $2 = s1_limb
|
||||||
|
subq $18,1,$18 # size--
|
||||||
|
stq $3,0($16)
|
||||||
|
beq $18,Lend2 # jump if size was == 2
|
||||||
|
|
||||||
|
.align 3
|
||||||
|
Loop: mulq $2,$19,$3 # $3 = prod_low
|
||||||
|
addq $4,$0,$0 # cy_limb = cy_limb + 'cy'
|
||||||
|
subq $18,1,$18 # size--
|
||||||
|
umulh $2,$19,$4 # $4 = cy_limb
|
||||||
|
ldq $2,16($17) # $2 = s1_limb
|
||||||
|
addq $17,8,$17 # s1_ptr++
|
||||||
|
addq $3,$0,$3 # $3 = cy_limb + prod_low
|
||||||
|
stq $3,8($16)
|
||||||
|
cmpult $3,$0,$0 # $0 = carry from (cy_limb + prod_low)
|
||||||
|
addq $16,8,$16 # res_ptr++
|
||||||
|
bne $18,Loop
|
||||||
|
|
||||||
|
Lend2: mulq $2,$19,$3 # $3 = prod_low
|
||||||
|
addq $4,$0,$0 # cy_limb = cy_limb + 'cy'
|
||||||
|
umulh $2,$19,$4 # $4 = cy_limb
|
||||||
|
addq $3,$0,$3 # $3 = cy_limb + prod_low
|
||||||
|
cmpult $3,$0,$0 # $0 = carry from (cy_limb + prod_low)
|
||||||
|
stq $3,8($16)
|
||||||
|
addq $4,$0,$0 # cy_limb = prod_high + cy
|
||||||
|
ret $31,($26),1
|
||||||
|
Lend1: stq $3,0($16)
|
||||||
|
ret $31,($26),1
|
||||||
|
|
||||||
|
.end __mpn_mul_1
|
106
sysdeps/alpha/rshift.s
Normal file
106
sysdeps/alpha/rshift.s
Normal file
@ -0,0 +1,106 @@
|
|||||||
|
# Alpha 21064 __mpn_rshift --
|
||||||
|
|
||||||
|
# Copyright (C) 1994, 1995 Free Software Foundation, Inc.
|
||||||
|
|
||||||
|
# This file is part of the GNU MP Library.
|
||||||
|
|
||||||
|
# The GNU MP Library is free software; you can redistribute it and/or modify
|
||||||
|
# it under the terms of the GNU Library General Public License as published by
|
||||||
|
# the Free Software Foundation; either version 2 of the License, or (at your
|
||||||
|
# option) any later version.
|
||||||
|
|
||||||
|
# The GNU MP Library is distributed in the hope that it will be useful, but
|
||||||
|
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
||||||
|
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
|
||||||
|
# License for more details.
|
||||||
|
|
||||||
|
# You should have received a copy of the GNU Library General Public License
|
||||||
|
# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
|
||||||
|
# the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||||
|
|
||||||
|
|
||||||
|
# INPUT PARAMETERS
|
||||||
|
# res_ptr r16
|
||||||
|
# s1_ptr r17
|
||||||
|
# size r18
|
||||||
|
# cnt r19
|
||||||
|
|
||||||
|
# This code runs at 4.8 cycles/limb on the 21064. With infinite unrolling,
|
||||||
|
# it would take 4 cycles/limb. It should be possible to get down to 3
|
||||||
|
# cycles/limb since both ldq and stq can be paired with the other used
|
||||||
|
# instructions. But there are many restrictions in the 21064 pipeline that
|
||||||
|
# makes it hard, if not impossible, to get down to 3 cycles/limb:
|
||||||
|
|
||||||
|
# 1. ldq has a 3 cycle delay, srl and sll have a 2 cycle delay.
|
||||||
|
# 2. Only aligned instruction pairs can be paired.
|
||||||
|
# 3. The store buffer or silo might not be able to deal with the bandwidth.
|
||||||
|
|
||||||
|
.set noreorder
|
||||||
|
.set noat
|
||||||
|
.text
|
||||||
|
.align 3
|
||||||
|
.globl __mpn_rshift
|
||||||
|
.ent __mpn_rshift
|
||||||
|
__mpn_rshift:
|
||||||
|
.frame $30,0,$26,0
|
||||||
|
|
||||||
|
ldq $4,0($17) # load first limb
|
||||||
|
addq $17,8,$17
|
||||||
|
subq $31,$19,$7
|
||||||
|
subq $18,1,$18
|
||||||
|
and $18,4-1,$20 # number of limbs in first loop
|
||||||
|
sll $4,$7,$0 # compute function result
|
||||||
|
|
||||||
|
beq $20,L0
|
||||||
|
subq $18,$20,$18
|
||||||
|
|
||||||
|
.align 3
|
||||||
|
Loop0:
|
||||||
|
ldq $3,0($17)
|
||||||
|
addq $16,8,$16
|
||||||
|
addq $17,8,$17
|
||||||
|
subq $20,1,$20
|
||||||
|
srl $4,$19,$5
|
||||||
|
sll $3,$7,$6
|
||||||
|
bis $3,$3,$4
|
||||||
|
bis $5,$6,$8
|
||||||
|
stq $8,-8($16)
|
||||||
|
bne $20,Loop0
|
||||||
|
|
||||||
|
L0: beq $18,Lend
|
||||||
|
|
||||||
|
.align 3
|
||||||
|
Loop: ldq $3,0($17)
|
||||||
|
addq $16,32,$16
|
||||||
|
subq $18,4,$18
|
||||||
|
srl $4,$19,$5
|
||||||
|
sll $3,$7,$6
|
||||||
|
|
||||||
|
ldq $4,8($17)
|
||||||
|
srl $3,$19,$1
|
||||||
|
bis $5,$6,$8
|
||||||
|
stq $8,-32($16)
|
||||||
|
sll $4,$7,$2
|
||||||
|
|
||||||
|
ldq $3,16($17)
|
||||||
|
srl $4,$19,$5
|
||||||
|
bis $1,$2,$8
|
||||||
|
stq $8,-24($16)
|
||||||
|
sll $3,$7,$6
|
||||||
|
|
||||||
|
ldq $4,24($17)
|
||||||
|
srl $3,$19,$1
|
||||||
|
bis $5,$6,$8
|
||||||
|
stq $8,-16($16)
|
||||||
|
sll $4,$7,$2
|
||||||
|
|
||||||
|
addq $17,32,$17
|
||||||
|
bis $1,$2,$8
|
||||||
|
stq $8,-8($16)
|
||||||
|
|
||||||
|
bgt $18,Loop
|
||||||
|
|
||||||
|
Lend: srl $4,$19,$8
|
||||||
|
stq $8,0($16)
|
||||||
|
ret $31,($26),1
|
||||||
|
.end __mpn_rshift
|
119
sysdeps/alpha/sub_n.s
Normal file
119
sysdeps/alpha/sub_n.s
Normal file
@ -0,0 +1,119 @@
|
|||||||
|
# Alpha __mpn_sub_n -- Subtract two limb vectors of the same length > 0 and
|
||||||
|
# store difference in a third limb vector.
|
||||||
|
|
||||||
|
# Copyright (C) 1995 Free Software Foundation, Inc.
|
||||||
|
|
||||||
|
# This file is part of the GNU MP Library.
|
||||||
|
|
||||||
|
# The GNU MP Library is free software; you can redistribute it and/or modify
|
||||||
|
# it under the terms of the GNU Library General Public License as published by
|
||||||
|
# the Free Software Foundation; either version 2 of the License, or (at your
|
||||||
|
# option) any later version.
|
||||||
|
|
||||||
|
# The GNU MP Library is distributed in the hope that it will be useful, but
|
||||||
|
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
||||||
|
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
|
||||||
|
# License for more details.
|
||||||
|
|
||||||
|
# You should have received a copy of the GNU Library General Public License
|
||||||
|
# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
|
||||||
|
# the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||||
|
|
||||||
|
|
||||||
|
# INPUT PARAMETERS
|
||||||
|
# res_ptr $16
|
||||||
|
# s1_ptr $17
|
||||||
|
# s2_ptr $18
|
||||||
|
# size $19
|
||||||
|
|
||||||
|
.set noreorder
|
||||||
|
.set noat
|
||||||
|
.text
|
||||||
|
.align 3
|
||||||
|
.globl __mpn_sub_n
|
||||||
|
.ent __mpn_sub_n
|
||||||
|
__mpn_sub_n:
|
||||||
|
.frame $30,0,$26,0
|
||||||
|
|
||||||
|
ldq $3,0($17)
|
||||||
|
ldq $4,0($18)
|
||||||
|
|
||||||
|
subq $19,1,$19
|
||||||
|
and $19,4-1,$2 # number of limbs in first loop
|
||||||
|
bis $31,$31,$0
|
||||||
|
beq $2,.L0 # if multiple of 4 limbs, skip first loop
|
||||||
|
|
||||||
|
subq $19,$2,$19
|
||||||
|
|
||||||
|
.Loop0: subq $2,1,$2
|
||||||
|
ldq $5,8($17)
|
||||||
|
addq $4,$0,$4
|
||||||
|
ldq $6,8($18)
|
||||||
|
cmpult $4,$0,$1
|
||||||
|
subq $3,$4,$4
|
||||||
|
cmpult $3,$4,$0
|
||||||
|
stq $4,0($16)
|
||||||
|
or $0,$1,$0
|
||||||
|
|
||||||
|
addq $17,8,$17
|
||||||
|
addq $18,8,$18
|
||||||
|
bis $5,$5,$3
|
||||||
|
bis $6,$6,$4
|
||||||
|
addq $16,8,$16
|
||||||
|
bne $2,.Loop0
|
||||||
|
|
||||||
|
.L0: beq $19,.Lend
|
||||||
|
|
||||||
|
.align 3
|
||||||
|
.Loop: subq $19,4,$19
|
||||||
|
|
||||||
|
ldq $5,8($17)
|
||||||
|
addq $4,$0,$4
|
||||||
|
ldq $6,8($18)
|
||||||
|
cmpult $4,$0,$1
|
||||||
|
subq $3,$4,$4
|
||||||
|
cmpult $3,$4,$0
|
||||||
|
stq $4,0($16)
|
||||||
|
or $0,$1,$0
|
||||||
|
|
||||||
|
ldq $3,16($17)
|
||||||
|
addq $6,$0,$6
|
||||||
|
ldq $4,16($18)
|
||||||
|
cmpult $6,$0,$1
|
||||||
|
subq $5,$6,$6
|
||||||
|
cmpult $5,$6,$0
|
||||||
|
stq $6,8($16)
|
||||||
|
or $0,$1,$0
|
||||||
|
|
||||||
|
ldq $5,24($17)
|
||||||
|
addq $4,$0,$4
|
||||||
|
ldq $6,24($18)
|
||||||
|
cmpult $4,$0,$1
|
||||||
|
subq $3,$4,$4
|
||||||
|
cmpult $3,$4,$0
|
||||||
|
stq $4,16($16)
|
||||||
|
or $0,$1,$0
|
||||||
|
|
||||||
|
ldq $3,32($17)
|
||||||
|
addq $6,$0,$6
|
||||||
|
ldq $4,32($18)
|
||||||
|
cmpult $6,$0,$1
|
||||||
|
subq $5,$6,$6
|
||||||
|
cmpult $5,$6,$0
|
||||||
|
stq $6,24($16)
|
||||||
|
or $0,$1,$0
|
||||||
|
|
||||||
|
addq $17,32,$17
|
||||||
|
addq $18,32,$18
|
||||||
|
addq $16,32,$16
|
||||||
|
bne $19,.Loop
|
||||||
|
|
||||||
|
.Lend: addq $4,$0,$4
|
||||||
|
cmpult $4,$0,$1
|
||||||
|
subq $3,$4,$4
|
||||||
|
cmpult $3,$4,$0
|
||||||
|
stq $4,0($16)
|
||||||
|
or $0,$1,$0
|
||||||
|
ret $31,($26),1
|
||||||
|
|
||||||
|
.end __mpn_sub_n
|
100
sysdeps/alpha/submul_1.s
Normal file
100
sysdeps/alpha/submul_1.s
Normal file
@ -0,0 +1,100 @@
|
|||||||
|
# Alpha 21064 __mpn_submul_1 -- Multiply a limb vector with a limb and
|
||||||
|
# subtract the result from a second limb vector.
|
||||||
|
|
||||||
|
# Copyright (C) 1992, 1994, 1995 Free Software Foundation, Inc.
|
||||||
|
|
||||||
|
# This file is part of the GNU MP Library.
|
||||||
|
|
||||||
|
# The GNU MP Library is free software; you can redistribute it and/or modify
|
||||||
|
# it under the terms of the GNU Library General Public License as published by
|
||||||
|
# the Free Software Foundation; either version 2 of the License, or (at your
|
||||||
|
# option) any later version.
|
||||||
|
|
||||||
|
# The GNU MP Library is distributed in the hope that it will be useful, but
|
||||||
|
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
||||||
|
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
|
||||||
|
# License for more details.
|
||||||
|
|
||||||
|
# You should have received a copy of the GNU Library General Public License
|
||||||
|
# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
|
||||||
|
# the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||||
|
|
||||||
|
|
||||||
|
# INPUT PARAMETERS
|
||||||
|
# res_ptr r16
|
||||||
|
# s1_ptr r17
|
||||||
|
# size r18
|
||||||
|
# s2_limb r19
|
||||||
|
|
||||||
|
# This code runs at 42 cycles/limb on the 21064.
|
||||||
|
|
||||||
|
# To improve performance for long multiplications, we would use
|
||||||
|
# 'fetch' for S1 and 'fetch_m' for RES. It's not obvious how to use
|
||||||
|
# these instructions without slowing down the general code: 1. We can
|
||||||
|
# only have two prefetches in operation at any time in the Alpha
|
||||||
|
# architecture. 2. There will seldom be any special alignment
|
||||||
|
# between RES_PTR and S1_PTR. Maybe we can simply divide the current
|
||||||
|
# loop into an inner and outer loop, having the inner loop handle
|
||||||
|
# exactly one prefetch block?
|
||||||
|
|
||||||
|
.set noreorder
|
||||||
|
.set noat
|
||||||
|
.text
|
||||||
|
.align 3
|
||||||
|
.globl __mpn_submul_1
|
||||||
|
.ent __mpn_submul_1 2
|
||||||
|
__mpn_submul_1:
|
||||||
|
.frame $30,0,$26
|
||||||
|
|
||||||
|
ldq $2,0($17) # $2 = s1_limb
|
||||||
|
addq $17,8,$17 # s1_ptr++
|
||||||
|
subq $18,1,$18 # size--
|
||||||
|
mulq $2,$19,$3 # $3 = prod_low
|
||||||
|
ldq $5,0($16) # $5 = *res_ptr
|
||||||
|
umulh $2,$19,$0 # $0 = prod_high
|
||||||
|
beq $18,Lend1 # jump if size was == 1
|
||||||
|
ldq $2,0($17) # $2 = s1_limb
|
||||||
|
addq $17,8,$17 # s1_ptr++
|
||||||
|
subq $18,1,$18 # size--
|
||||||
|
subq $5,$3,$3
|
||||||
|
cmpult $5,$3,$4
|
||||||
|
stq $3,0($16)
|
||||||
|
addq $16,8,$16 # res_ptr++
|
||||||
|
beq $18,Lend2 # jump if size was == 2
|
||||||
|
|
||||||
|
.align 3
|
||||||
|
Loop: mulq $2,$19,$3 # $3 = prod_low
|
||||||
|
ldq $5,0($16) # $5 = *res_ptr
|
||||||
|
addq $4,$0,$0 # cy_limb = cy_limb + 'cy'
|
||||||
|
subq $18,1,$18 # size--
|
||||||
|
umulh $2,$19,$4 # $4 = cy_limb
|
||||||
|
ldq $2,0($17) # $2 = s1_limb
|
||||||
|
addq $17,8,$17 # s1_ptr++
|
||||||
|
addq $3,$0,$3 # $3 = cy_limb + prod_low
|
||||||
|
cmpult $3,$0,$0 # $0 = carry from (cy_limb + prod_low)
|
||||||
|
subq $5,$3,$3
|
||||||
|
cmpult $5,$3,$5
|
||||||
|
stq $3,0($16)
|
||||||
|
addq $16,8,$16 # res_ptr++
|
||||||
|
addq $5,$0,$0 # combine carries
|
||||||
|
bne $18,Loop
|
||||||
|
|
||||||
|
Lend2: mulq $2,$19,$3 # $3 = prod_low
|
||||||
|
ldq $5,0($16) # $5 = *res_ptr
|
||||||
|
addq $4,$0,$0 # cy_limb = cy_limb + 'cy'
|
||||||
|
umulh $2,$19,$4 # $4 = cy_limb
|
||||||
|
addq $3,$0,$3 # $3 = cy_limb + prod_low
|
||||||
|
cmpult $3,$0,$0 # $0 = carry from (cy_limb + prod_low)
|
||||||
|
subq $5,$3,$3
|
||||||
|
cmpult $5,$3,$5
|
||||||
|
stq $3,0($16)
|
||||||
|
addq $5,$0,$0 # combine carries
|
||||||
|
addq $4,$0,$0 # cy_limb = prod_high + cy
|
||||||
|
ret $31,($26),1
|
||||||
|
Lend1: subq $5,$3,$3
|
||||||
|
cmpult $5,$3,$5
|
||||||
|
stq $3,0($16)
|
||||||
|
addq $0,$5,$0
|
||||||
|
ret $31,($26),1
|
||||||
|
|
||||||
|
.end __mpn_submul_1
|
57
sysdeps/hppa/add_n.s
Normal file
57
sysdeps/hppa/add_n.s
Normal file
@ -0,0 +1,57 @@
|
|||||||
|
; HP-PA __mpn_add_n -- Add two limb vectors of the same length > 0 and store
|
||||||
|
; sum in a third limb vector.
|
||||||
|
|
||||||
|
; Copyright (C) 1992, 1994 Free Software Foundation, Inc.
|
||||||
|
|
||||||
|
; This file is part of the GNU MP Library.
|
||||||
|
|
||||||
|
; The GNU MP Library is free software; you can redistribute it and/or modify
|
||||||
|
; it under the terms of the GNU Library General Public License as published by
|
||||||
|
; the Free Software Foundation; either version 2 of the License, or (at your
|
||||||
|
; option) any later version.
|
||||||
|
|
||||||
|
; The GNU MP Library is distributed in the hope that it will be useful, but
|
||||||
|
; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
||||||
|
; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
|
||||||
|
; License for more details.
|
||||||
|
|
||||||
|
; You should have received a copy of the GNU Library General Public License
|
||||||
|
; along with the GNU MP Library; see the file COPYING.LIB. If not, write to
|
||||||
|
; the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||||
|
|
||||||
|
|
||||||
|
; INPUT PARAMETERS
|
||||||
|
; res_ptr gr26
|
||||||
|
; s1_ptr gr25
|
||||||
|
; s2_ptr gr24
|
||||||
|
; size gr23
|
||||||
|
|
||||||
|
; One might want to unroll this as for other processors, but it turns
|
||||||
|
; out that the data cache contention after a store makes such
|
||||||
|
; unrolling useless. We can't come under 5 cycles/limb anyway.
|
||||||
|
|
||||||
|
.code
|
||||||
|
.export __mpn_add_n
|
||||||
|
__mpn_add_n
|
||||||
|
.proc
|
||||||
|
.callinfo frame=0,no_calls
|
||||||
|
.entry
|
||||||
|
|
||||||
|
ldws,ma 4(0,%r25),%r20
|
||||||
|
ldws,ma 4(0,%r24),%r19
|
||||||
|
|
||||||
|
addib,= -1,%r23,L$end ; check for (SIZE == 1)
|
||||||
|
add %r20,%r19,%r28 ; add first limbs ignoring cy
|
||||||
|
|
||||||
|
L$loop ldws,ma 4(0,%r25),%r20
|
||||||
|
ldws,ma 4(0,%r24),%r19
|
||||||
|
stws,ma %r28,4(0,%r26)
|
||||||
|
addib,<> -1,%r23,L$loop
|
||||||
|
addc %r20,%r19,%r28
|
||||||
|
|
||||||
|
L$end stws %r28,0(0,%r26)
|
||||||
|
bv 0(%r2)
|
||||||
|
addc %r0,%r0,%r28
|
||||||
|
|
||||||
|
.exit
|
||||||
|
.procend
|
101
sysdeps/hppa/hppa1.1/addmul_1.s
Normal file
101
sysdeps/hppa/hppa1.1/addmul_1.s
Normal file
@ -0,0 +1,101 @@
|
|||||||
|
; HP-PA-1.1 __mpn_addmul_1 -- Multiply a limb vector with a limb and
|
||||||
|
; add the result to a second limb vector.
|
||||||
|
|
||||||
|
; Copyright (C) 1992, 1993, 1994 Free Software Foundation, Inc.
|
||||||
|
|
||||||
|
; This file is part of the GNU MP Library.
|
||||||
|
|
||||||
|
; The GNU MP Library is free software; you can redistribute it and/or modify
|
||||||
|
; it under the terms of the GNU Library General Public License as published by
|
||||||
|
; the Free Software Foundation; either version 2 of the License, or (at your
|
||||||
|
; option) any later version.
|
||||||
|
|
||||||
|
; The GNU MP Library is distributed in the hope that it will be useful, but
|
||||||
|
; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
||||||
|
; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
|
||||||
|
; License for more details.
|
||||||
|
|
||||||
|
; You should have received a copy of the GNU Library General Public License
|
||||||
|
; along with the GNU MP Library; see the file COPYING.LIB. If not, write to
|
||||||
|
; the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||||
|
|
||||||
|
|
||||||
|
; INPUT PARAMETERS
|
||||||
|
; res_ptr r26
|
||||||
|
; s1_ptr r25
|
||||||
|
; size r24
|
||||||
|
; s2_limb r23
|
||||||
|
|
||||||
|
; This runs at 11 cycles/limb on a PA7000. With the used instructions, it
|
||||||
|
; can not become faster due to data cache contention after a store. On the
|
||||||
|
; PA7100 it runs at 10 cycles/limb, and that can not be improved either,
|
||||||
|
; since only the xmpyu does not need the integer pipeline, so the only
|
||||||
|
; dual-issue we will get are addc+xmpyu. Unrolling could gain a cycle/limb
|
||||||
|
; on the PA7100.
|
||||||
|
|
||||||
|
; There are some ideas described in mul_1.s that applies to this code too.
|
||||||
|
|
||||||
|
.code
|
||||||
|
.export __mpn_addmul_1
|
||||||
|
__mpn_addmul_1
|
||||||
|
.proc
|
||||||
|
.callinfo frame=64,no_calls
|
||||||
|
.entry
|
||||||
|
|
||||||
|
ldo 64(%r30),%r30
|
||||||
|
fldws,ma 4(%r25),%fr5
|
||||||
|
stw %r23,-16(%r30) ; move s2_limb ...
|
||||||
|
addib,= -1,%r24,L$just_one_limb
|
||||||
|
fldws -16(%r30),%fr4 ; ... into fr4
|
||||||
|
add %r0,%r0,%r0 ; clear carry
|
||||||
|
xmpyu %fr4,%fr5,%fr6
|
||||||
|
fldws,ma 4(%r25),%fr7
|
||||||
|
fstds %fr6,-16(%r30)
|
||||||
|
xmpyu %fr4,%fr7,%fr8
|
||||||
|
ldw -12(%r30),%r19 ; least significant limb in product
|
||||||
|
ldw -16(%r30),%r28
|
||||||
|
|
||||||
|
fstds %fr8,-16(%r30)
|
||||||
|
addib,= -1,%r24,L$end
|
||||||
|
ldw -12(%r30),%r1
|
||||||
|
|
||||||
|
; Main loop
|
||||||
|
L$loop ldws 0(%r26),%r29
|
||||||
|
fldws,ma 4(%r25),%fr5
|
||||||
|
add %r29,%r19,%r19
|
||||||
|
stws,ma %r19,4(%r26)
|
||||||
|
addc %r28,%r1,%r19
|
||||||
|
xmpyu %fr4,%fr5,%fr6
|
||||||
|
ldw -16(%r30),%r28
|
||||||
|
fstds %fr6,-16(%r30)
|
||||||
|
addc %r0,%r28,%r28
|
||||||
|
addib,<> -1,%r24,L$loop
|
||||||
|
ldw -12(%r30),%r1
|
||||||
|
|
||||||
|
L$end ldw 0(%r26),%r29
|
||||||
|
add %r29,%r19,%r19
|
||||||
|
stws,ma %r19,4(%r26)
|
||||||
|
addc %r28,%r1,%r19
|
||||||
|
ldw -16(%r30),%r28
|
||||||
|
ldws 0(%r26),%r29
|
||||||
|
addc %r0,%r28,%r28
|
||||||
|
add %r29,%r19,%r19
|
||||||
|
stws,ma %r19,4(%r26)
|
||||||
|
addc %r0,%r28,%r28
|
||||||
|
bv 0(%r2)
|
||||||
|
ldo -64(%r30),%r30
|
||||||
|
|
||||||
|
L$just_one_limb
|
||||||
|
xmpyu %fr4,%fr5,%fr6
|
||||||
|
ldw 0(%r26),%r29
|
||||||
|
fstds %fr6,-16(%r30)
|
||||||
|
ldw -12(%r30),%r1
|
||||||
|
ldw -16(%r30),%r28
|
||||||
|
add %r29,%r1,%r19
|
||||||
|
stw %r19,0(%r26)
|
||||||
|
addc %r0,%r28,%r28
|
||||||
|
bv 0(%r2)
|
||||||
|
ldo -64(%r30),%r30
|
||||||
|
|
||||||
|
.exit
|
||||||
|
.procend
|
97
sysdeps/hppa/hppa1.1/mul_1.s
Normal file
97
sysdeps/hppa/hppa1.1/mul_1.s
Normal file
@ -0,0 +1,97 @@
|
|||||||
|
; HP-PA-1.1 __mpn_mul_1 -- Multiply a limb vector with a limb and store
|
||||||
|
; the result in a second limb vector.
|
||||||
|
|
||||||
|
; Copyright (C) 1992, 1993, 1994 Free Software Foundation, Inc.
|
||||||
|
|
||||||
|
; This file is part of the GNU MP Library.
|
||||||
|
|
||||||
|
; The GNU MP Library is free software; you can redistribute it and/or modify
|
||||||
|
; it under the terms of the GNU Library General Public License as published by
|
||||||
|
; the Free Software Foundation; either version 2 of the License, or (at your
|
||||||
|
; option) any later version.
|
||||||
|
|
||||||
|
; The GNU MP Library is distributed in the hope that it will be useful, but
|
||||||
|
; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
||||||
|
; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
|
||||||
|
; License for more details.
|
||||||
|
|
||||||
|
; You should have received a copy of the GNU Library General Public License
|
||||||
|
; along with the GNU MP Library; see the file COPYING.LIB. If not, write to
|
||||||
|
; the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||||
|
|
||||||
|
|
||||||
|
; INPUT PARAMETERS
|
||||||
|
; res_ptr r26
|
||||||
|
; s1_ptr r25
|
||||||
|
; size r24
|
||||||
|
; s2_limb r23
|
||||||
|
|
||||||
|
; This runs at 9 cycles/limb on a PA7000. With the used instructions, it can
|
||||||
|
; not become faster due to data cache contention after a store. On the
|
||||||
|
; PA7100 it runs at 7 cycles/limb, and that can not be improved either, since
|
||||||
|
; only the xmpyu does not need the integer pipeline, so the only dual-issue
|
||||||
|
; we will get are addc+xmpyu. Unrolling would not help either CPU.
|
||||||
|
|
||||||
|
; We could use fldds to read two limbs at a time from the S1 array, and that
|
||||||
|
; could bring down the times to 8.5 and 6.5 cycles/limb for the PA7000 and
|
||||||
|
; PA7100, respectively. We don't do that since it does not seem worth the
|
||||||
|
; (alignment) troubles...
|
||||||
|
|
||||||
|
; At least the PA7100 is rumored to be able to deal with cache-misses
|
||||||
|
; without stalling instruction issue. If this is true, and the cache is
|
||||||
|
; actually also lockup-free, we should use a deeper software pipeline, and
|
||||||
|
; load from S1 very early! (The loads and stores to -12(sp) will surely be
|
||||||
|
; in the cache.)
|
||||||
|
|
||||||
|
.code
|
||||||
|
.export __mpn_mul_1
|
||||||
|
__mpn_mul_1
|
||||||
|
.proc
|
||||||
|
.callinfo frame=64,no_calls
|
||||||
|
.entry
|
||||||
|
|
||||||
|
ldo 64(%r30),%r30
|
||||||
|
fldws,ma 4(%r25),%fr5
|
||||||
|
stw %r23,-16(%r30) ; move s2_limb ...
|
||||||
|
addib,= -1,%r24,L$just_one_limb
|
||||||
|
fldws -16(%r30),%fr4 ; ... into fr4
|
||||||
|
add %r0,%r0,%r0 ; clear carry
|
||||||
|
xmpyu %fr4,%fr5,%fr6
|
||||||
|
fldws,ma 4(%r25),%fr7
|
||||||
|
fstds %fr6,-16(%r30)
|
||||||
|
xmpyu %fr4,%fr7,%fr8
|
||||||
|
ldw -12(%r30),%r19 ; least significant limb in product
|
||||||
|
ldw -16(%r30),%r28
|
||||||
|
|
||||||
|
fstds %fr8,-16(%r30)
|
||||||
|
addib,= -1,%r24,L$end
|
||||||
|
ldw -12(%r30),%r1
|
||||||
|
|
||||||
|
; Main loop
|
||||||
|
L$loop fldws,ma 4(%r25),%fr5
|
||||||
|
stws,ma %r19,4(%r26)
|
||||||
|
addc %r28,%r1,%r19
|
||||||
|
xmpyu %fr4,%fr5,%fr6
|
||||||
|
ldw -16(%r30),%r28
|
||||||
|
fstds %fr6,-16(%r30)
|
||||||
|
addib,<> -1,%r24,L$loop
|
||||||
|
ldw -12(%r30),%r1
|
||||||
|
|
||||||
|
L$end stws,ma %r19,4(%r26)
|
||||||
|
addc %r28,%r1,%r19
|
||||||
|
ldw -16(%r30),%r28
|
||||||
|
stws,ma %r19,4(%r26)
|
||||||
|
addc %r0,%r28,%r28
|
||||||
|
bv 0(%r2)
|
||||||
|
ldo -64(%r30),%r30
|
||||||
|
|
||||||
|
L$just_one_limb
|
||||||
|
xmpyu %fr4,%fr5,%fr6
|
||||||
|
fstds %fr6,-16(%r30)
|
||||||
|
ldw -16(%r30),%r28
|
||||||
|
ldo -64(%r30),%r30
|
||||||
|
bv 0(%r2)
|
||||||
|
fstws %fr6R,0(%r26)
|
||||||
|
|
||||||
|
.exit
|
||||||
|
.procend
|
110
sysdeps/hppa/hppa1.1/submul_1.s
Normal file
110
sysdeps/hppa/hppa1.1/submul_1.s
Normal file
@ -0,0 +1,110 @@
|
|||||||
|
; HP-PA-1.1 __mpn_submul_1 -- Multiply a limb vector with a limb and
|
||||||
|
; subtract the result from a second limb vector.
|
||||||
|
|
||||||
|
; Copyright (C) 1992, 1993, 1994 Free Software Foundation, Inc.
|
||||||
|
|
||||||
|
; This file is part of the GNU MP Library.
|
||||||
|
|
||||||
|
; The GNU MP Library is free software; you can redistribute it and/or modify
|
||||||
|
; it under the terms of the GNU Library General Public License as published by
|
||||||
|
; the Free Software Foundation; either version 2 of the License, or (at your
|
||||||
|
; option) any later version.
|
||||||
|
|
||||||
|
; The GNU MP Library is distributed in the hope that it will be useful, but
|
||||||
|
; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
||||||
|
; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
|
||||||
|
; License for more details.
|
||||||
|
|
||||||
|
; You should have received a copy of the GNU Library General Public License
|
||||||
|
; along with the GNU MP Library; see the file COPYING.LIB. If not, write to
|
||||||
|
; the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||||
|
|
||||||
|
|
||||||
|
; INPUT PARAMETERS
|
||||||
|
; res_ptr r26
|
||||||
|
; s1_ptr r25
|
||||||
|
; size r24
|
||||||
|
; s2_limb r23
|
||||||
|
|
||||||
|
; This runs at 12 cycles/limb on a PA7000. With the used instructions, it
|
||||||
|
; can not become faster due to data cache contention after a store. On the
|
||||||
|
; PA7100 it runs at 11 cycles/limb, and that can not be improved either,
|
||||||
|
; since only the xmpyu does not need the integer pipeline, so the only
|
||||||
|
; dual-issue we will get are addc+xmpyu. Unrolling could gain a cycle/limb
|
||||||
|
; on the PA7100.
|
||||||
|
|
||||||
|
; There are some ideas described in mul_1.s that applies to this code too.
|
||||||
|
|
||||||
|
; It seems possible to make this run as fast as __mpn_addmul_1, if we use
|
||||||
|
; sub,>>= %r29,%r19,%r22
|
||||||
|
; addi 1,%r28,%r28
|
||||||
|
; but that requires reworking the hairy software pipeline...
|
||||||
|
|
||||||
|
.code
|
||||||
|
.export __mpn_submul_1
|
||||||
|
__mpn_submul_1
|
||||||
|
.proc
|
||||||
|
.callinfo frame=64,no_calls
|
||||||
|
.entry
|
||||||
|
|
||||||
|
ldo 64(%r30),%r30
|
||||||
|
fldws,ma 4(%r25),%fr5
|
||||||
|
stw %r23,-16(%r30) ; move s2_limb ...
|
||||||
|
addib,= -1,%r24,L$just_one_limb
|
||||||
|
fldws -16(%r30),%fr4 ; ... into fr4
|
||||||
|
add %r0,%r0,%r0 ; clear carry
|
||||||
|
xmpyu %fr4,%fr5,%fr6
|
||||||
|
fldws,ma 4(%r25),%fr7
|
||||||
|
fstds %fr6,-16(%r30)
|
||||||
|
xmpyu %fr4,%fr7,%fr8
|
||||||
|
ldw -12(%r30),%r19 ; least significant limb in product
|
||||||
|
ldw -16(%r30),%r28
|
||||||
|
|
||||||
|
fstds %fr8,-16(%r30)
|
||||||
|
addib,= -1,%r24,L$end
|
||||||
|
ldw -12(%r30),%r1
|
||||||
|
|
||||||
|
; Main loop
|
||||||
|
L$loop ldws 0(%r26),%r29
|
||||||
|
fldws,ma 4(%r25),%fr5
|
||||||
|
sub %r29,%r19,%r22
|
||||||
|
add %r22,%r19,%r0
|
||||||
|
stws,ma %r22,4(%r26)
|
||||||
|
addc %r28,%r1,%r19
|
||||||
|
xmpyu %fr4,%fr5,%fr6
|
||||||
|
ldw -16(%r30),%r28
|
||||||
|
fstds %fr6,-16(%r30)
|
||||||
|
addc %r0,%r28,%r28
|
||||||
|
addib,<> -1,%r24,L$loop
|
||||||
|
ldw -12(%r30),%r1
|
||||||
|
|
||||||
|
L$end ldw 0(%r26),%r29
|
||||||
|
sub %r29,%r19,%r22
|
||||||
|
add %r22,%r19,%r0
|
||||||
|
stws,ma %r22,4(%r26)
|
||||||
|
addc %r28,%r1,%r19
|
||||||
|
ldw -16(%r30),%r28
|
||||||
|
ldws 0(%r26),%r29
|
||||||
|
addc %r0,%r28,%r28
|
||||||
|
sub %r29,%r19,%r22
|
||||||
|
add %r22,%r19,%r0
|
||||||
|
stws,ma %r22,4(%r26)
|
||||||
|
addc %r0,%r28,%r28
|
||||||
|
bv 0(%r2)
|
||||||
|
ldo -64(%r30),%r30
|
||||||
|
|
||||||
|
L$just_one_limb
|
||||||
|
xmpyu %fr4,%fr5,%fr6
|
||||||
|
ldw 0(%r26),%r29
|
||||||
|
fstds %fr6,-16(%r30)
|
||||||
|
ldw -12(%r30),%r1
|
||||||
|
ldw -16(%r30),%r28
|
||||||
|
sub %r29,%r1,%r22
|
||||||
|
add %r22,%r1,%r0
|
||||||
|
stw %r22,0(%r26)
|
||||||
|
addc %r0,%r28,%r28
|
||||||
|
bv 0(%r2)
|
||||||
|
ldo -64(%r30),%r30
|
||||||
|
|
||||||
|
.exit
|
||||||
|
.procend
|
74
sysdeps/hppa/hppa1.1/udiv_qrnnd.s
Normal file
74
sysdeps/hppa/hppa1.1/udiv_qrnnd.s
Normal file
@ -0,0 +1,74 @@
|
|||||||
|
; HP-PA __udiv_qrnnd division support, used from longlong.h.
|
||||||
|
; This version runs fast on PA 7000 and later.
|
||||||
|
|
||||||
|
; Copyright (C) 1993, 1994 Free Software Foundation, Inc.
|
||||||
|
|
||||||
|
; This file is part of the GNU MP Library.
|
||||||
|
|
||||||
|
; The GNU MP Library is free software; you can redistribute it and/or modify
|
||||||
|
; it under the terms of the GNU Library General Public License as published by
|
||||||
|
; the Free Software Foundation; either version 2 of the License, or (at your
|
||||||
|
; option) any later version.
|
||||||
|
|
||||||
|
; The GNU MP Library is distributed in the hope that it will be useful, but
|
||||||
|
; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
||||||
|
; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
|
||||||
|
; License for more details.
|
||||||
|
|
||||||
|
; You should have received a copy of the GNU Library General Public License
|
||||||
|
; along with the GNU MP Library; see the file COPYING.LIB. If not, write to
|
||||||
|
; the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||||
|
|
||||||
|
|
||||||
|
; INPUT PARAMETERS
|
||||||
|
; rem_ptr gr26
|
||||||
|
; n1 gr25
|
||||||
|
; n0 gr24
|
||||||
|
; d gr23
|
||||||
|
|
||||||
|
.code
|
||||||
|
L$0000 .word 0x43f00000
|
||||||
|
.word 0x0
|
||||||
|
.export __udiv_qrnnd
|
||||||
|
__udiv_qrnnd
|
||||||
|
.proc
|
||||||
|
.callinfo frame=64,no_calls
|
||||||
|
.entry
|
||||||
|
ldo 64(%r30),%r30
|
||||||
|
|
||||||
|
stws %r25,-16(0,%r30) ; n_hi
|
||||||
|
stws %r24,-12(0,%r30) ; n_lo
|
||||||
|
ldil L'L$0000,%r19
|
||||||
|
ldo R'L$0000(%r19),%r19
|
||||||
|
fldds -16(0,%r30),%fr5
|
||||||
|
stws %r23,-12(0,%r30)
|
||||||
|
comib,<= 0,%r25,L$1
|
||||||
|
fcnvxf,dbl,dbl %fr5,%fr5
|
||||||
|
fldds 0(0,%r19),%fr4
|
||||||
|
fadd,dbl %fr4,%fr5,%fr5
|
||||||
|
L$1
|
||||||
|
fcpy,sgl %fr0,%fr6L
|
||||||
|
fldws -12(0,%r30),%fr6R
|
||||||
|
fcnvxf,dbl,dbl %fr6,%fr4
|
||||||
|
|
||||||
|
fdiv,dbl %fr5,%fr4,%fr5
|
||||||
|
|
||||||
|
fcnvfx,dbl,dbl %fr5,%fr4
|
||||||
|
fstws %fr4R,-16(%r30)
|
||||||
|
xmpyu %fr4R,%fr6R,%fr6
|
||||||
|
ldws -16(%r30),%r28
|
||||||
|
fstds %fr6,-16(0,%r30)
|
||||||
|
ldws -12(0,%r30),%r21
|
||||||
|
ldws -16(0,%r30),%r20
|
||||||
|
sub %r24,%r21,%r22
|
||||||
|
subb %r25,%r20,%r19
|
||||||
|
comib,= 0,%r19,L$2
|
||||||
|
ldo -64(%r30),%r30
|
||||||
|
|
||||||
|
add %r22,%r23,%r22
|
||||||
|
ldo -1(%r28),%r28
|
||||||
|
L$2 bv 0(%r2)
|
||||||
|
stws %r22,0(0,%r26)
|
||||||
|
|
||||||
|
.exit
|
||||||
|
.procend
|
65
sysdeps/hppa/lshift.s
Normal file
65
sysdeps/hppa/lshift.s
Normal file
@ -0,0 +1,65 @@
|
|||||||
|
; HP-PA __mpn_lshift --
|
||||||
|
|
||||||
|
; Copyright (C) 1992, 1994 Free Software Foundation, Inc.
|
||||||
|
|
||||||
|
; This file is part of the GNU MP Library.
|
||||||
|
|
||||||
|
; The GNU MP Library is free software; you can redistribute it and/or modify
|
||||||
|
; it under the terms of the GNU Library General Public License as published by
|
||||||
|
; the Free Software Foundation; either version 2 of the License, or (at your
|
||||||
|
; option) any later version.
|
||||||
|
|
||||||
|
; The GNU MP Library is distributed in the hope that it will be useful, but
|
||||||
|
; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
||||||
|
; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
|
||||||
|
; License for more details.
|
||||||
|
|
||||||
|
; You should have received a copy of the GNU Library General Public License
|
||||||
|
; along with the GNU MP Library; see the file COPYING.LIB. If not, write to
|
||||||
|
; the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||||
|
|
||||||
|
|
||||||
|
; INPUT PARAMETERS
|
||||||
|
; res_ptr gr26
|
||||||
|
; s_ptr gr25
|
||||||
|
; size gr24
|
||||||
|
; cnt gr23
|
||||||
|
|
||||||
|
.code
|
||||||
|
.export __mpn_lshift
|
||||||
|
__mpn_lshift
|
||||||
|
.proc
|
||||||
|
.callinfo frame=64,no_calls
|
||||||
|
.entry
|
||||||
|
|
||||||
|
sh2add %r24,%r25,%r25
|
||||||
|
sh2add %r24,%r26,%r26
|
||||||
|
ldws,mb -4(0,%r25),%r22
|
||||||
|
subi 32,%r23,%r1
|
||||||
|
mtsar %r1
|
||||||
|
addib,= -1,%r24,L$0004
|
||||||
|
vshd %r0,%r22,%r28 ; compute carry out limb
|
||||||
|
ldws,mb -4(0,%r25),%r29
|
||||||
|
addib,= -1,%r24,L$0002
|
||||||
|
vshd %r22,%r29,%r20
|
||||||
|
|
||||||
|
L$loop ldws,mb -4(0,%r25),%r22
|
||||||
|
stws,mb %r20,-4(0,%r26)
|
||||||
|
addib,= -1,%r24,L$0003
|
||||||
|
vshd %r29,%r22,%r20
|
||||||
|
ldws,mb -4(0,%r25),%r29
|
||||||
|
stws,mb %r20,-4(0,%r26)
|
||||||
|
addib,<> -1,%r24,L$loop
|
||||||
|
vshd %r22,%r29,%r20
|
||||||
|
|
||||||
|
L$0002 stws,mb %r20,-4(0,%r26)
|
||||||
|
vshd %r29,%r0,%r20
|
||||||
|
bv 0(%r2)
|
||||||
|
stw %r20,-4(0,%r26)
|
||||||
|
L$0003 stws,mb %r20,-4(0,%r26)
|
||||||
|
L$0004 vshd %r22,%r0,%r20
|
||||||
|
bv 0(%r2)
|
||||||
|
stw %r20,-4(0,%r26)
|
||||||
|
|
||||||
|
.exit
|
||||||
|
.procend
|
62
sysdeps/hppa/rshift.s
Normal file
62
sysdeps/hppa/rshift.s
Normal file
@ -0,0 +1,62 @@
|
|||||||
|
; HP-PA __mpn_rshift --
|
||||||
|
|
||||||
|
; Copyright (C) 1992, 1994 Free Software Foundation, Inc.
|
||||||
|
|
||||||
|
; This file is part of the GNU MP Library.
|
||||||
|
|
||||||
|
; The GNU MP Library is free software; you can redistribute it and/or modify
|
||||||
|
; it under the terms of the GNU Library General Public License as published by
|
||||||
|
; the Free Software Foundation; either version 2 of the License, or (at your
|
||||||
|
; option) any later version.
|
||||||
|
|
||||||
|
; The GNU MP Library is distributed in the hope that it will be useful, but
|
||||||
|
; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
||||||
|
; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
|
||||||
|
; License for more details.
|
||||||
|
|
||||||
|
; You should have received a copy of the GNU Library General Public License
|
||||||
|
; along with the GNU MP Library; see the file COPYING.LIB. If not, write to
|
||||||
|
; the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||||
|
|
||||||
|
|
||||||
|
; INPUT PARAMETERS
|
||||||
|
; res_ptr gr26
|
||||||
|
; s_ptr gr25
|
||||||
|
; size gr24
|
||||||
|
; cnt gr23
|
||||||
|
|
||||||
|
.code
|
||||||
|
.export __mpn_rshift
|
||||||
|
__mpn_rshift
|
||||||
|
.proc
|
||||||
|
.callinfo frame=64,no_calls
|
||||||
|
.entry
|
||||||
|
|
||||||
|
ldws,ma 4(0,%r25),%r22
|
||||||
|
mtsar %r23
|
||||||
|
addib,= -1,%r24,L$0004
|
||||||
|
vshd %r22,%r0,%r28 ; compute carry out limb
|
||||||
|
ldws,ma 4(0,%r25),%r29
|
||||||
|
addib,= -1,%r24,L$0002
|
||||||
|
vshd %r29,%r22,%r20
|
||||||
|
|
||||||
|
L$loop ldws,ma 4(0,%r25),%r22
|
||||||
|
stws,ma %r20,4(0,%r26)
|
||||||
|
addib,= -1,%r24,L$0003
|
||||||
|
vshd %r22,%r29,%r20
|
||||||
|
ldws,ma 4(0,%r25),%r29
|
||||||
|
stws,ma %r20,4(0,%r26)
|
||||||
|
addib,<> -1,%r24,L$loop
|
||||||
|
vshd %r29,%r22,%r20
|
||||||
|
|
||||||
|
L$0002 stws,ma %r20,4(0,%r26)
|
||||||
|
vshd %r0,%r29,%r20
|
||||||
|
bv 0(%r2)
|
||||||
|
stw %r20,0(0,%r26)
|
||||||
|
L$0003 stws,ma %r20,4(0,%r26)
|
||||||
|
L$0004 vshd %r0,%r22,%r20
|
||||||
|
bv 0(%r2)
|
||||||
|
stw %r20,0(0,%r26)
|
||||||
|
|
||||||
|
.exit
|
||||||
|
.procend
|
58
sysdeps/hppa/sub_n.s
Normal file
58
sysdeps/hppa/sub_n.s
Normal file
@ -0,0 +1,58 @@
|
|||||||
|
; HP-PA __mpn_sub_n -- Subtract two limb vectors of the same length > 0 and
|
||||||
|
; store difference in a third limb vector.
|
||||||
|
|
||||||
|
; Copyright (C) 1992, 1994 Free Software Foundation, Inc.
|
||||||
|
|
||||||
|
; This file is part of the GNU MP Library.
|
||||||
|
|
||||||
|
; The GNU MP Library is free software; you can redistribute it and/or modify
|
||||||
|
; it under the terms of the GNU Library General Public License as published by
|
||||||
|
; the Free Software Foundation; either version 2 of the License, or (at your
|
||||||
|
; option) any later version.
|
||||||
|
|
||||||
|
; The GNU MP Library is distributed in the hope that it will be useful, but
|
||||||
|
; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
||||||
|
; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
|
||||||
|
; License for more details.
|
||||||
|
|
||||||
|
; You should have received a copy of the GNU Library General Public License
|
||||||
|
; along with the GNU MP Library; see the file COPYING.LIB. If not, write to
|
||||||
|
; the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||||
|
|
||||||
|
|
||||||
|
; INPUT PARAMETERS
|
||||||
|
; res_ptr gr26
|
||||||
|
; s1_ptr gr25
|
||||||
|
; s2_ptr gr24
|
||||||
|
; size gr23
|
||||||
|
|
||||||
|
; One might want to unroll this as for other processors, but it turns
|
||||||
|
; out that the data cache contention after a store makes such
|
||||||
|
; unrolling useless. We can't come under 5 cycles/limb anyway.
|
||||||
|
|
||||||
|
.code
|
||||||
|
.export __mpn_sub_n
|
||||||
|
__mpn_sub_n
|
||||||
|
.proc
|
||||||
|
.callinfo frame=0,no_calls
|
||||||
|
.entry
|
||||||
|
|
||||||
|
ldws,ma 4(0,%r25),%r20
|
||||||
|
ldws,ma 4(0,%r24),%r19
|
||||||
|
|
||||||
|
addib,= -1,%r23,L$end ; check for (SIZE == 1)
|
||||||
|
sub %r20,%r19,%r28 ; subtract first limbs ignoring cy
|
||||||
|
|
||||||
|
L$loop ldws,ma 4(0,%r25),%r20
|
||||||
|
ldws,ma 4(0,%r24),%r19
|
||||||
|
stws,ma %r28,4(0,%r26)
|
||||||
|
addib,<> -1,%r23,L$loop
|
||||||
|
subb %r20,%r19,%r28
|
||||||
|
|
||||||
|
L$end stws %r28,0(0,%r26)
|
||||||
|
addc %r0,%r0,%r28
|
||||||
|
bv 0(%r2)
|
||||||
|
subi 1,%r28,%r28
|
||||||
|
|
||||||
|
.exit
|
||||||
|
.procend
|
285
sysdeps/hppa/udiv_qrnnd.s
Normal file
285
sysdeps/hppa/udiv_qrnnd.s
Normal file
@ -0,0 +1,285 @@
|
|||||||
|
; HP-PA __udiv_qrnnd division support, used from longlong.h.
|
||||||
|
; This version runs fast on pre-PA7000 CPUs.
|
||||||
|
|
||||||
|
; Copyright (C) 1993, 1994 Free Software Foundation, Inc.
|
||||||
|
|
||||||
|
; This file is part of the GNU MP Library.
|
||||||
|
|
||||||
|
; The GNU MP Library is free software; you can redistribute it and/or modify
|
||||||
|
; it under the terms of the GNU Library General Public License as published by
|
||||||
|
; the Free Software Foundation; either version 2 of the License, or (at your
|
||||||
|
; option) any later version.
|
||||||
|
|
||||||
|
; The GNU MP Library is distributed in the hope that it will be useful, but
|
||||||
|
; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
||||||
|
; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
|
||||||
|
; License for more details.
|
||||||
|
|
||||||
|
; You should have received a copy of the GNU Library General Public License
|
||||||
|
; along with the GNU MP Library; see the file COPYING.LIB. If not, write to
|
||||||
|
; the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||||
|
|
||||||
|
|
||||||
|
; INPUT PARAMETERS
|
||||||
|
; rem_ptr gr26
|
||||||
|
; n1 gr25
|
||||||
|
; n0 gr24
|
||||||
|
; d gr23
|
||||||
|
|
||||||
|
; The code size is a bit excessive. We could merge the last two ds;addc
|
||||||
|
; sequences by simply moving the "bb,< Odd" instruction down. The only
|
||||||
|
; trouble is the FFFFFFFF code that would need some hacking.
|
||||||
|
|
||||||
|
.code
|
||||||
|
.export __udiv_qrnnd
|
||||||
|
__udiv_qrnnd
|
||||||
|
.proc
|
||||||
|
.callinfo frame=0,no_calls
|
||||||
|
.entry
|
||||||
|
|
||||||
|
comb,< %r23,0,L$largedivisor
|
||||||
|
sub %r0,%r23,%r1 ; clear cy as side-effect
|
||||||
|
ds %r0,%r1,%r0
|
||||||
|
addc %r24,%r24,%r24
|
||||||
|
ds %r25,%r23,%r25
|
||||||
|
addc %r24,%r24,%r24
|
||||||
|
ds %r25,%r23,%r25
|
||||||
|
addc %r24,%r24,%r24
|
||||||
|
ds %r25,%r23,%r25
|
||||||
|
addc %r24,%r24,%r24
|
||||||
|
ds %r25,%r23,%r25
|
||||||
|
addc %r24,%r24,%r24
|
||||||
|
ds %r25,%r23,%r25
|
||||||
|
addc %r24,%r24,%r24
|
||||||
|
ds %r25,%r23,%r25
|
||||||
|
addc %r24,%r24,%r24
|
||||||
|
ds %r25,%r23,%r25
|
||||||
|
addc %r24,%r24,%r24
|
||||||
|
ds %r25,%r23,%r25
|
||||||
|
addc %r24,%r24,%r24
|
||||||
|
ds %r25,%r23,%r25
|
||||||
|
addc %r24,%r24,%r24
|
||||||
|
ds %r25,%r23,%r25
|
||||||
|
addc %r24,%r24,%r24
|
||||||
|
ds %r25,%r23,%r25
|
||||||
|
addc %r24,%r24,%r24
|
||||||
|
ds %r25,%r23,%r25
|
||||||
|
addc %r24,%r24,%r24
|
||||||
|
ds %r25,%r23,%r25
|
||||||
|
addc %r24,%r24,%r24
|
||||||
|
ds %r25,%r23,%r25
|
||||||
|
addc %r24,%r24,%r24
|
||||||
|
ds %r25,%r23,%r25
|
||||||
|
addc %r24,%r24,%r24
|
||||||
|
ds %r25,%r23,%r25
|
||||||
|
addc %r24,%r24,%r24
|
||||||
|
ds %r25,%r23,%r25
|
||||||
|
addc %r24,%r24,%r24
|
||||||
|
ds %r25,%r23,%r25
|
||||||
|
addc %r24,%r24,%r24
|
||||||
|
ds %r25,%r23,%r25
|
||||||
|
addc %r24,%r24,%r24
|
||||||
|
ds %r25,%r23,%r25
|
||||||
|
addc %r24,%r24,%r24
|
||||||
|
ds %r25,%r23,%r25
|
||||||
|
addc %r24,%r24,%r24
|
||||||
|
ds %r25,%r23,%r25
|
||||||
|
addc %r24,%r24,%r24
|
||||||
|
ds %r25,%r23,%r25
|
||||||
|
addc %r24,%r24,%r24
|
||||||
|
ds %r25,%r23,%r25
|
||||||
|
addc %r24,%r24,%r24
|
||||||
|
ds %r25,%r23,%r25
|
||||||
|
addc %r24,%r24,%r24
|
||||||
|
ds %r25,%r23,%r25
|
||||||
|
addc %r24,%r24,%r24
|
||||||
|
ds %r25,%r23,%r25
|
||||||
|
addc %r24,%r24,%r24
|
||||||
|
ds %r25,%r23,%r25
|
||||||
|
addc %r24,%r24,%r24
|
||||||
|
ds %r25,%r23,%r25
|
||||||
|
addc %r24,%r24,%r24
|
||||||
|
ds %r25,%r23,%r25
|
||||||
|
addc %r24,%r24,%r24
|
||||||
|
ds %r25,%r23,%r25
|
||||||
|
addc %r24,%r24,%r28
|
||||||
|
ds %r25,%r23,%r25
|
||||||
|
comclr,>= %r25,%r0,%r0
|
||||||
|
addl %r25,%r23,%r25
|
||||||
|
stws %r25,0(0,%r26)
|
||||||
|
bv 0(%r2)
|
||||||
|
addc %r28,%r28,%r28
|
||||||
|
|
||||||
|
L$largedivisor
|
||||||
|
extru %r24,31,1,%r19 ; r19 = n0 & 1
|
||||||
|
bb,< %r23,31,L$odd
|
||||||
|
extru %r23,30,31,%r22 ; r22 = d >> 1
|
||||||
|
shd %r25,%r24,1,%r24 ; r24 = new n0
|
||||||
|
extru %r25,30,31,%r25 ; r25 = new n1
|
||||||
|
sub %r0,%r22,%r21
|
||||||
|
ds %r0,%r21,%r0
|
||||||
|
addc %r24,%r24,%r24
|
||||||
|
ds %r25,%r22,%r25
|
||||||
|
addc %r24,%r24,%r24
|
||||||
|
ds %r25,%r22,%r25
|
||||||
|
addc %r24,%r24,%r24
|
||||||
|
ds %r25,%r22,%r25
|
||||||
|
addc %r24,%r24,%r24
|
||||||
|
ds %r25,%r22,%r25
|
||||||
|
addc %r24,%r24,%r24
|
||||||
|
ds %r25,%r22,%r25
|
||||||
|
addc %r24,%r24,%r24
|
||||||
|
ds %r25,%r22,%r25
|
||||||
|
addc %r24,%r24,%r24
|
||||||
|
ds %r25,%r22,%r25
|
||||||
|
addc %r24,%r24,%r24
|
||||||
|
ds %r25,%r22,%r25
|
||||||
|
addc %r24,%r24,%r24
|
||||||
|
ds %r25,%r22,%r25
|
||||||
|
addc %r24,%r24,%r24
|
||||||
|
ds %r25,%r22,%r25
|
||||||
|
addc %r24,%r24,%r24
|
||||||
|
ds %r25,%r22,%r25
|
||||||
|
addc %r24,%r24,%r24
|
||||||
|
ds %r25,%r22,%r25
|
||||||
|
addc %r24,%r24,%r24
|
||||||
|
ds %r25,%r22,%r25
|
||||||
|
addc %r24,%r24,%r24
|
||||||
|
ds %r25,%r22,%r25
|
||||||
|
addc %r24,%r24,%r24
|
||||||
|
ds %r25,%r22,%r25
|
||||||
|
addc %r24,%r24,%r24
|
||||||
|
ds %r25,%r22,%r25
|
||||||
|
addc %r24,%r24,%r24
|
||||||
|
ds %r25,%r22,%r25
|
||||||
|
addc %r24,%r24,%r24
|
||||||
|
ds %r25,%r22,%r25
|
||||||
|
addc %r24,%r24,%r24
|
||||||
|
ds %r25,%r22,%r25
|
||||||
|
addc %r24,%r24,%r24
|
||||||
|
ds %r25,%r22,%r25
|
||||||
|
addc %r24,%r24,%r24
|
||||||
|
ds %r25,%r22,%r25
|
||||||
|
addc %r24,%r24,%r24
|
||||||
|
ds %r25,%r22,%r25
|
||||||
|
addc %r24,%r24,%r24
|
||||||
|
ds %r25,%r22,%r25
|
||||||
|
addc %r24,%r24,%r24
|
||||||
|
ds %r25,%r22,%r25
|
||||||
|
addc %r24,%r24,%r24
|
||||||
|
ds %r25,%r22,%r25
|
||||||
|
addc %r24,%r24,%r24
|
||||||
|
ds %r25,%r22,%r25
|
||||||
|
addc %r24,%r24,%r24
|
||||||
|
ds %r25,%r22,%r25
|
||||||
|
addc %r24,%r24,%r24
|
||||||
|
ds %r25,%r22,%r25
|
||||||
|
addc %r24,%r24,%r24
|
||||||
|
ds %r25,%r22,%r25
|
||||||
|
addc %r24,%r24,%r24
|
||||||
|
ds %r25,%r22,%r25
|
||||||
|
addc %r24,%r24,%r24
|
||||||
|
ds %r25,%r22,%r25
|
||||||
|
addc %r24,%r24,%r24
|
||||||
|
ds %r25,%r22,%r25
|
||||||
|
comclr,>= %r25,%r0,%r0
|
||||||
|
addl %r25,%r22,%r25
|
||||||
|
sh1addl %r25,%r19,%r25
|
||||||
|
stws %r25,0(0,%r26)
|
||||||
|
bv 0(%r2)
|
||||||
|
addc %r24,%r24,%r28
|
||||||
|
|
||||||
|
L$odd addib,sv,n 1,%r22,L$FF.. ; r22 = (d / 2 + 1)
|
||||||
|
shd %r25,%r24,1,%r24 ; r24 = new n0
|
||||||
|
extru %r25,30,31,%r25 ; r25 = new n1
|
||||||
|
sub %r0,%r22,%r21
|
||||||
|
ds %r0,%r21,%r0
|
||||||
|
addc %r24,%r24,%r24
|
||||||
|
ds %r25,%r22,%r25
|
||||||
|
addc %r24,%r24,%r24
|
||||||
|
ds %r25,%r22,%r25
|
||||||
|
addc %r24,%r24,%r24
|
||||||
|
ds %r25,%r22,%r25
|
||||||
|
addc %r24,%r24,%r24
|
||||||
|
ds %r25,%r22,%r25
|
||||||
|
addc %r24,%r24,%r24
|
||||||
|
ds %r25,%r22,%r25
|
||||||
|
addc %r24,%r24,%r24
|
||||||
|
ds %r25,%r22,%r25
|
||||||
|
addc %r24,%r24,%r24
|
||||||
|
ds %r25,%r22,%r25
|
||||||
|
addc %r24,%r24,%r24
|
||||||
|
ds %r25,%r22,%r25
|
||||||
|
addc %r24,%r24,%r24
|
||||||
|
ds %r25,%r22,%r25
|
||||||
|
addc %r24,%r24,%r24
|
||||||
|
ds %r25,%r22,%r25
|
||||||
|
addc %r24,%r24,%r24
|
||||||
|
ds %r25,%r22,%r25
|
||||||
|
addc %r24,%r24,%r24
|
||||||
|
ds %r25,%r22,%r25
|
||||||
|
addc %r24,%r24,%r24
|
||||||
|
ds %r25,%r22,%r25
|
||||||
|
addc %r24,%r24,%r24
|
||||||
|
ds %r25,%r22,%r25
|
||||||
|
addc %r24,%r24,%r24
|
||||||
|
ds %r25,%r22,%r25
|
||||||
|
addc %r24,%r24,%r24
|
||||||
|
ds %r25,%r22,%r25
|
||||||
|
addc %r24,%r24,%r24
|
||||||
|
ds %r25,%r22,%r25
|
||||||
|
addc %r24,%r24,%r24
|
||||||
|
ds %r25,%r22,%r25
|
||||||
|
addc %r24,%r24,%r24
|
||||||
|
ds %r25,%r22,%r25
|
||||||
|
addc %r24,%r24,%r24
|
||||||
|
ds %r25,%r22,%r25
|
||||||
|
addc %r24,%r24,%r24
|
||||||
|
ds %r25,%r22,%r25
|
||||||
|
addc %r24,%r24,%r24
|
||||||
|
ds %r25,%r22,%r25
|
||||||
|
addc %r24,%r24,%r24
|
||||||
|
ds %r25,%r22,%r25
|
||||||
|
addc %r24,%r24,%r24
|
||||||
|
ds %r25,%r22,%r25
|
||||||
|
addc %r24,%r24,%r24
|
||||||
|
ds %r25,%r22,%r25
|
||||||
|
addc %r24,%r24,%r24
|
||||||
|
ds %r25,%r22,%r25
|
||||||
|
addc %r24,%r24,%r24
|
||||||
|
ds %r25,%r22,%r25
|
||||||
|
addc %r24,%r24,%r24
|
||||||
|
ds %r25,%r22,%r25
|
||||||
|
addc %r24,%r24,%r24
|
||||||
|
ds %r25,%r22,%r25
|
||||||
|
addc %r24,%r24,%r24
|
||||||
|
ds %r25,%r22,%r25
|
||||||
|
addc %r24,%r24,%r24
|
||||||
|
ds %r25,%r22,%r25
|
||||||
|
addc %r24,%r24,%r24
|
||||||
|
ds %r25,%r22,%r25
|
||||||
|
addc %r24,%r24,%r28
|
||||||
|
comclr,>= %r25,%r0,%r0
|
||||||
|
addl %r25,%r22,%r25
|
||||||
|
sh1addl %r25,%r19,%r25
|
||||||
|
; We have computed (n1,,n0) / (d + 1), q' = r28, r' = r25
|
||||||
|
add,nuv %r28,%r25,%r25
|
||||||
|
addl %r25,%r1,%r25
|
||||||
|
addc %r0,%r28,%r28
|
||||||
|
sub,<< %r25,%r23,%r0
|
||||||
|
addl %r25,%r1,%r25
|
||||||
|
stws %r25,0(0,%r26)
|
||||||
|
bv 0(%r2)
|
||||||
|
addc %r0,%r28,%r28
|
||||||
|
|
||||||
|
; This is just a special case of the code above.
|
||||||
|
; We come here when d == 0xFFFFFFFF
|
||||||
|
L$FF.. add,uv %r25,%r24,%r24
|
||||||
|
sub,<< %r24,%r23,%r0
|
||||||
|
ldo 1(%r24),%r24
|
||||||
|
stws %r24,0(0,%r26)
|
||||||
|
bv 0(%r2)
|
||||||
|
addc %r0,%r25,%r28
|
||||||
|
|
||||||
|
.exit
|
||||||
|
.procend
|
21
sysdeps/i960/add_n.s
Normal file
21
sysdeps/i960/add_n.s
Normal file
@ -0,0 +1,21 @@
|
|||||||
|
.text
|
||||||
|
.align 4
|
||||||
|
.globl ___mpn_add_n
|
||||||
|
___mpn_add_n:
|
||||||
|
mov 0,g6 # clear carry-save register
|
||||||
|
cmpo 1,0 # clear cy
|
||||||
|
|
||||||
|
Loop: subo 1,g3,g3 # update loop counter
|
||||||
|
ld (g1),g5 # load from s1_ptr
|
||||||
|
addo 4,g1,g1 # s1_ptr++
|
||||||
|
ld (g2),g4 # load from s2_ptr
|
||||||
|
addo 4,g2,g2 # s2_ptr++
|
||||||
|
cmpo g6,1 # restore cy from g6, relies on cy being 0
|
||||||
|
addc g4,g5,g4 # main add
|
||||||
|
subc 0,0,g6 # save cy in g6
|
||||||
|
st g4,(g0) # store result to res_ptr
|
||||||
|
addo 4,g0,g0 # res_ptr++
|
||||||
|
cmpobne 0,g3,Loop # when branch is taken, clears C bit
|
||||||
|
|
||||||
|
mov g6,g0
|
||||||
|
ret
|
26
sysdeps/i960/addmul_1.s
Normal file
26
sysdeps/i960/addmul_1.s
Normal file
@ -0,0 +1,26 @@
|
|||||||
|
.text
|
||||||
|
.align 4
|
||||||
|
.globl ___mpn_mul_1
|
||||||
|
___mpn_mul_1:
|
||||||
|
subo g2,0,g2
|
||||||
|
shlo 2,g2,g4
|
||||||
|
subo g4,g1,g1
|
||||||
|
subo g4,g0,g13
|
||||||
|
mov 0,g0
|
||||||
|
|
||||||
|
cmpo 1,0 # clear C bit on AC.cc
|
||||||
|
|
||||||
|
Loop: ld (g1)[g2*4],g5
|
||||||
|
emul g3,g5,g6
|
||||||
|
ld (g13)[g2*4],g5
|
||||||
|
|
||||||
|
addc g0,g6,g6 # relies on that C bit is clear
|
||||||
|
addc 0,g7,g7
|
||||||
|
addc g5,g6,g6 # relies on that C bit is clear
|
||||||
|
st g6,(g13)[g2*4]
|
||||||
|
addc 0,g7,g0
|
||||||
|
|
||||||
|
addo g2,1,g2
|
||||||
|
cmpobne 0,g2,Loop # when branch is taken, clears C bit
|
||||||
|
|
||||||
|
ret
|
23
sysdeps/i960/mul_1.s
Normal file
23
sysdeps/i960/mul_1.s
Normal file
@ -0,0 +1,23 @@
|
|||||||
|
.text
|
||||||
|
.align 4
|
||||||
|
.globl ___mpn_mul_1
|
||||||
|
___mpn_mul_1:
|
||||||
|
subo g2,0,g2
|
||||||
|
shlo 2,g2,g4
|
||||||
|
subo g4,g1,g1
|
||||||
|
subo g4,g0,g13
|
||||||
|
mov 0,g0
|
||||||
|
|
||||||
|
cmpo 1,0 # clear C bit on AC.cc
|
||||||
|
|
||||||
|
Loop: ld (g1)[g2*4],g5
|
||||||
|
emul g3,g5,g6
|
||||||
|
|
||||||
|
addc g0,g6,g6 # relies on that C bit is clear
|
||||||
|
st g6,(g13)[g2*4]
|
||||||
|
addc 0,g7,g0
|
||||||
|
|
||||||
|
addo g2,1,g2
|
||||||
|
cmpobne 0,g2,Loop # when branch is taken, clears C bit
|
||||||
|
|
||||||
|
ret
|
21
sysdeps/i960/sub_n.s
Normal file
21
sysdeps/i960/sub_n.s
Normal file
@ -0,0 +1,21 @@
|
|||||||
|
.text
|
||||||
|
.align 4
|
||||||
|
.globl ___mpn_sub_n
|
||||||
|
___mpn_sub_n:
|
||||||
|
mov 1,g6 # set carry-save register
|
||||||
|
cmpo 1,0 # clear cy
|
||||||
|
|
||||||
|
Loop: subo 1,g3,g3 # update loop counter
|
||||||
|
ld (g1),g5 # load from s1_ptr
|
||||||
|
addo 4,g1,g1 # s1_ptr++
|
||||||
|
ld (g2),g4 # load from s2_ptr
|
||||||
|
addo 4,g2,g2 # s2_ptr++
|
||||||
|
cmpo g6,1 # restore cy from g6, relies on cy being 0
|
||||||
|
subc g4,g5,g4 # main subtract
|
||||||
|
subc 0,0,g6 # save cy in g6
|
||||||
|
st g4,(g0) # store result to res_ptr
|
||||||
|
addo 4,g0,g0 # res_ptr++
|
||||||
|
cmpobne 0,g3,Loop # when branch is taken, cy will be 0
|
||||||
|
|
||||||
|
mov g6,g0
|
||||||
|
ret
|
103
sysdeps/m88k/m88100/add_n.s
Normal file
103
sysdeps/m88k/m88100/add_n.s
Normal file
@ -0,0 +1,103 @@
|
|||||||
|
; mc88100 __mpn_add -- Add two limb vectors of the same length > 0 and store
|
||||||
|
; sum in a third limb vector.
|
||||||
|
|
||||||
|
; Copyright (C) 1992, 1994 Free Software Foundation, Inc.
|
||||||
|
|
||||||
|
; This file is part of the GNU MP Library.
|
||||||
|
|
||||||
|
; The GNU MP Library is free software; you can redistribute it and/or modify
|
||||||
|
; it under the terms of the GNU Library General Public License as published by
|
||||||
|
; the Free Software Foundation; either version 2 of the License, or (at your
|
||||||
|
; option) any later version.
|
||||||
|
|
||||||
|
; The GNU MP Library is distributed in the hope that it will be useful, but
|
||||||
|
; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
||||||
|
; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
|
||||||
|
; License for more details.
|
||||||
|
|
||||||
|
; You should have received a copy of the GNU Library General Public License
|
||||||
|
; along with the GNU MP Library; see the file COPYING.LIB. If not, write to
|
||||||
|
; the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||||
|
|
||||||
|
|
||||||
|
; INPUT PARAMETERS
|
||||||
|
; res_ptr r2
|
||||||
|
; s1_ptr r3
|
||||||
|
; s2_ptr r4
|
||||||
|
; size r5
|
||||||
|
|
||||||
|
; This code has been optimized to run one instruction per clock, avoiding
|
||||||
|
; load stalls and writeback contention. As a result, the instruction
|
||||||
|
; order is not always natural.
|
||||||
|
|
||||||
|
; The speed is about 4.6 clocks/limb + 18 clocks/limb-vector on an 88100,
|
||||||
|
; but on the 88110, it seems to run much slower, 6.6 clocks/limb.
|
||||||
|
|
||||||
|
text
|
||||||
|
align 16
|
||||||
|
global ___mpn_add_n
|
||||||
|
___mpn_add_n:
|
||||||
|
ld r6,r3,0 ; read first limb from s1_ptr
|
||||||
|
extu r10,r5,3
|
||||||
|
ld r7,r4,0 ; read first limb from s2_ptr
|
||||||
|
|
||||||
|
subu.co r5,r0,r5 ; (clear carry as side effect)
|
||||||
|
mak r5,r5,3<4>
|
||||||
|
bcnd eq0,r5,Lzero
|
||||||
|
|
||||||
|
or r12,r0,lo16(Lbase)
|
||||||
|
or.u r12,r12,hi16(Lbase)
|
||||||
|
addu r12,r12,r5 ; r12 is address for entering in loop
|
||||||
|
|
||||||
|
extu r5,r5,2 ; divide by 4
|
||||||
|
subu r2,r2,r5 ; adjust res_ptr
|
||||||
|
subu r3,r3,r5 ; adjust s1_ptr
|
||||||
|
subu r4,r4,r5 ; adjust s2_ptr
|
||||||
|
|
||||||
|
or r8,r6,r0
|
||||||
|
|
||||||
|
jmp.n r12
|
||||||
|
or r9,r7,r0
|
||||||
|
|
||||||
|
Loop: addu r3,r3,32
|
||||||
|
st r8,r2,28
|
||||||
|
addu r4,r4,32
|
||||||
|
ld r6,r3,0
|
||||||
|
addu r2,r2,32
|
||||||
|
ld r7,r4,0
|
||||||
|
Lzero: subu r10,r10,1 ; add 0 + 8r limbs (adj loop cnt)
|
||||||
|
Lbase: ld r8,r3,4
|
||||||
|
addu.cio r6,r6,r7
|
||||||
|
ld r9,r4,4
|
||||||
|
st r6,r2,0
|
||||||
|
ld r6,r3,8 ; add 7 + 8r limbs
|
||||||
|
addu.cio r8,r8,r9
|
||||||
|
ld r7,r4,8
|
||||||
|
st r8,r2,4
|
||||||
|
ld r8,r3,12 ; add 6 + 8r limbs
|
||||||
|
addu.cio r6,r6,r7
|
||||||
|
ld r9,r4,12
|
||||||
|
st r6,r2,8
|
||||||
|
ld r6,r3,16 ; add 5 + 8r limbs
|
||||||
|
addu.cio r8,r8,r9
|
||||||
|
ld r7,r4,16
|
||||||
|
st r8,r2,12
|
||||||
|
ld r8,r3,20 ; add 4 + 8r limbs
|
||||||
|
addu.cio r6,r6,r7
|
||||||
|
ld r9,r4,20
|
||||||
|
st r6,r2,16
|
||||||
|
ld r6,r3,24 ; add 3 + 8r limbs
|
||||||
|
addu.cio r8,r8,r9
|
||||||
|
ld r7,r4,24
|
||||||
|
st r8,r2,20
|
||||||
|
ld r8,r3,28 ; add 2 + 8r limbs
|
||||||
|
addu.cio r6,r6,r7
|
||||||
|
ld r9,r4,28
|
||||||
|
st r6,r2,24
|
||||||
|
bcnd.n ne0,r10,Loop ; add 1 + 8r limbs
|
||||||
|
addu.cio r8,r8,r9
|
||||||
|
|
||||||
|
st r8,r2,28 ; store most significant limb
|
||||||
|
|
||||||
|
jmp.n r1
|
||||||
|
addu.ci r2,r0,r0 ; return carry-out from most sign. limb
|
128
sysdeps/m88k/m88100/mul_1.s
Normal file
128
sysdeps/m88k/m88100/mul_1.s
Normal file
@ -0,0 +1,128 @@
|
|||||||
|
; mc88100 __mpn_mul_1 -- Multiply a limb vector with a single limb and
|
||||||
|
; store the product in a second limb vector.
|
||||||
|
|
||||||
|
; Copyright (C) 1992, 1994 Free Software Foundation, Inc.
|
||||||
|
|
||||||
|
; This file is part of the GNU MP Library.
|
||||||
|
|
||||||
|
; The GNU MP Library is free software; you can redistribute it and/or modify
|
||||||
|
; it under the terms of the GNU Library General Public License as published by
|
||||||
|
; the Free Software Foundation; either version 2 of the License, or (at your
|
||||||
|
; option) any later version.
|
||||||
|
|
||||||
|
; The GNU MP Library is distributed in the hope that it will be useful, but
|
||||||
|
; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
||||||
|
; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
|
||||||
|
; License for more details.
|
||||||
|
|
||||||
|
; You should have received a copy of the GNU Library General Public License
|
||||||
|
; along with the GNU MP Library; see the file COPYING.LIB. If not, write to
|
||||||
|
; the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||||
|
|
||||||
|
|
||||||
|
; INPUT PARAMETERS
|
||||||
|
; res_ptr r2
|
||||||
|
; s1_ptr r3
|
||||||
|
; size r4
|
||||||
|
; s2_limb r5
|
||||||
|
|
||||||
|
; Common overhead is about 11 cycles/invocation.
|
||||||
|
|
||||||
|
; The speed for S2_LIMB >= 0x10000 is approximately 21 cycles/limb. (The
|
||||||
|
; pipeline stalls 2 cycles due to WB contention.)
|
||||||
|
|
||||||
|
; The speed for S2_LIMB < 0x10000 is approximately 16 cycles/limb. (The
|
||||||
|
; pipeline stalls 2 cycles due to WB contention and 1 cycle due to latency.)
|
||||||
|
|
||||||
|
; To enhance speed:
|
||||||
|
; 1. Unroll main loop 4-8 times.
|
||||||
|
; 2. Schedule code to avoid WB contention. It might be tempting to move the
|
||||||
|
; ld instruction in the loops down to save 2 cycles (less WB contention),
|
||||||
|
; but that looses because the ultimate value will be read from outside
|
||||||
|
; the allocated space. But if we handle the ultimate multiplication in
|
||||||
|
; the tail, we can do this.
|
||||||
|
; 3. Make the multiplication with less instructions. I think the code for
|
||||||
|
; (S2_LIMB >= 0x10000) is not minimal.
|
||||||
|
; With these techniques the (S2_LIMB >= 0x10000) case would run in 17 or
|
||||||
|
; less cycles/limb; the (S2_LIMB < 0x10000) case would run in 11
|
||||||
|
; cycles/limb. (Assuming infinite unrolling.)
|
||||||
|
|
||||||
|
text
|
||||||
|
align 16
|
||||||
|
global ___mpn_mul_1
|
||||||
|
___mpn_mul_1:
|
||||||
|
|
||||||
|
; Make S1_PTR and RES_PTR point at the end of their blocks
|
||||||
|
; and negate SIZE.
|
||||||
|
lda r3,r3[r4]
|
||||||
|
lda r6,r2[r4] ; RES_PTR in r6 since r2 is retval
|
||||||
|
subu r4,r0,r4
|
||||||
|
|
||||||
|
addu.co r2,r0,r0 ; r2 = cy = 0
|
||||||
|
ld r9,r3[r4]
|
||||||
|
mask r7,r5,0xffff ; r7 = lo(S2_LIMB)
|
||||||
|
extu r8,r5,16 ; r8 = hi(S2_LIMB)
|
||||||
|
bcnd.n eq0,r8,Lsmall ; jump if (hi(S2_LIMB) == 0)
|
||||||
|
subu r6,r6,4
|
||||||
|
|
||||||
|
; General code for any value of S2_LIMB.
|
||||||
|
|
||||||
|
; Make a stack frame and save r25 and r26
|
||||||
|
subu r31,r31,16
|
||||||
|
st.d r25,r31,8
|
||||||
|
|
||||||
|
; Enter the loop in the middle
|
||||||
|
br.n L1
|
||||||
|
addu r4,r4,1
|
||||||
|
|
||||||
|
Loop:
|
||||||
|
ld r9,r3[r4]
|
||||||
|
st r26,r6[r4]
|
||||||
|
; bcnd ne0,r0,0 ; bubble
|
||||||
|
addu r4,r4,1
|
||||||
|
L1: mul r26,r9,r5 ; low word of product mul_1 WB ld
|
||||||
|
mask r12,r9,0xffff ; r12 = lo(s1_limb) mask_1
|
||||||
|
mul r11,r12,r7 ; r11 = prod_0 mul_2 WB mask_1
|
||||||
|
mul r10,r12,r8 ; r10 = prod_1a mul_3
|
||||||
|
extu r13,r9,16 ; r13 = hi(s1_limb) extu_1 WB mul_1
|
||||||
|
mul r12,r13,r7 ; r12 = prod_1b mul_4 WB extu_1
|
||||||
|
mul r25,r13,r8 ; r25 = prod_2 mul_5 WB mul_2
|
||||||
|
extu r11,r11,16 ; r11 = hi(prod_0) extu_2 WB mul_3
|
||||||
|
addu r10,r10,r11 ; addu_1 WB extu_2
|
||||||
|
; bcnd ne0,r0,0 ; bubble WB addu_1
|
||||||
|
addu.co r10,r10,r12 ; WB mul_4
|
||||||
|
mask.u r10,r10,0xffff ; move the 16 most significant bits...
|
||||||
|
addu.ci r10,r10,r0 ; ...to the low half of the word...
|
||||||
|
rot r10,r10,16 ; ...and put carry in pos 16.
|
||||||
|
addu.co r26,r26,r2 ; add old carry limb
|
||||||
|
bcnd.n ne0,r4,Loop
|
||||||
|
addu.ci r2,r25,r10 ; compute new carry limb
|
||||||
|
|
||||||
|
st r26,r6[r4]
|
||||||
|
ld.d r25,r31,8
|
||||||
|
jmp.n r1
|
||||||
|
addu r31,r31,16
|
||||||
|
|
||||||
|
; Fast code for S2_LIMB < 0x10000
|
||||||
|
Lsmall:
|
||||||
|
; Enter the loop in the middle
|
||||||
|
br.n SL1
|
||||||
|
addu r4,r4,1
|
||||||
|
|
||||||
|
SLoop:
|
||||||
|
ld r9,r3[r4] ;
|
||||||
|
st r8,r6[r4] ;
|
||||||
|
addu r4,r4,1 ;
|
||||||
|
SL1: mul r8,r9,r5 ; low word of product
|
||||||
|
mask r12,r9,0xffff ; r12 = lo(s1_limb)
|
||||||
|
extu r13,r9,16 ; r13 = hi(s1_limb)
|
||||||
|
mul r11,r12,r7 ; r11 = prod_0
|
||||||
|
mul r12,r13,r7 ; r12 = prod_1b
|
||||||
|
addu.cio r8,r8,r2 ; add old carry limb
|
||||||
|
extu r10,r11,16 ; r11 = hi(prod_0)
|
||||||
|
addu r10,r10,r12 ;
|
||||||
|
bcnd.n ne0,r4,SLoop
|
||||||
|
extu r2,r10,16 ; r2 = new carry limb
|
||||||
|
|
||||||
|
jmp.n r1
|
||||||
|
st r8,r6[r4]
|
104
sysdeps/m88k/m88100/sub_n.s
Normal file
104
sysdeps/m88k/m88100/sub_n.s
Normal file
@ -0,0 +1,104 @@
|
|||||||
|
; mc88100 __mpn_sub -- Subtract two limb vectors of the same length > 0 and
|
||||||
|
; store difference in a third limb vector.
|
||||||
|
|
||||||
|
; Copyright (C) 1992, 1994 Free Software Foundation, Inc.
|
||||||
|
|
||||||
|
; This file is part of the GNU MP Library.
|
||||||
|
|
||||||
|
; The GNU MP Library is free software; you can redistribute it and/or modify
|
||||||
|
; it under the terms of the GNU Library General Public License as published by
|
||||||
|
; the Free Software Foundation; either version 2 of the License, or (at your
|
||||||
|
; option) any later version.
|
||||||
|
|
||||||
|
; The GNU MP Library is distributed in the hope that it will be useful, but
|
||||||
|
; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
||||||
|
; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
|
||||||
|
; License for more details.
|
||||||
|
|
||||||
|
; You should have received a copy of the GNU Library General Public License
|
||||||
|
; along with the GNU MP Library; see the file COPYING.LIB. If not, write to
|
||||||
|
; the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||||
|
|
||||||
|
|
||||||
|
; INPUT PARAMETERS
|
||||||
|
; res_ptr r2
|
||||||
|
; s1_ptr r3
|
||||||
|
; s2_ptr r4
|
||||||
|
; size r5
|
||||||
|
|
||||||
|
; This code has been optimized to run one instruction per clock, avoiding
|
||||||
|
; load stalls and writeback contention. As a result, the instruction
|
||||||
|
; order is not always natural.
|
||||||
|
|
||||||
|
; The speed is about 4.6 clocks/limb + 18 clocks/limb-vector on an 88100,
|
||||||
|
; but on the 88110, it seems to run much slower, 6.6 clocks/limb.
|
||||||
|
|
||||||
|
text
|
||||||
|
align 16
|
||||||
|
global ___mpn_sub_n
|
||||||
|
___mpn_sub_n:
|
||||||
|
ld r6,r3,0 ; read first limb from s1_ptr
|
||||||
|
extu r10,r5,3
|
||||||
|
ld r7,r4,0 ; read first limb from s2_ptr
|
||||||
|
|
||||||
|
subu.co r5,r0,r5 ; (clear carry as side effect)
|
||||||
|
mak r5,r5,3<4>
|
||||||
|
bcnd eq0,r5,Lzero
|
||||||
|
|
||||||
|
or r12,r0,lo16(Lbase)
|
||||||
|
or.u r12,r12,hi16(Lbase)
|
||||||
|
addu r12,r12,r5 ; r12 is address for entering in loop
|
||||||
|
|
||||||
|
extu r5,r5,2 ; divide by 4
|
||||||
|
subu r2,r2,r5 ; adjust res_ptr
|
||||||
|
subu r3,r3,r5 ; adjust s1_ptr
|
||||||
|
subu r4,r4,r5 ; adjust s2_ptr
|
||||||
|
|
||||||
|
or r8,r6,r0
|
||||||
|
|
||||||
|
jmp.n r12
|
||||||
|
or r9,r7,r0
|
||||||
|
|
||||||
|
Loop: addu r3,r3,32
|
||||||
|
st r8,r2,28
|
||||||
|
addu r4,r4,32
|
||||||
|
ld r6,r3,0
|
||||||
|
addu r2,r2,32
|
||||||
|
ld r7,r4,0
|
||||||
|
Lzero: subu r10,r10,1 ; subtract 0 + 8r limbs (adj loop cnt)
|
||||||
|
Lbase: ld r8,r3,4
|
||||||
|
subu.cio r6,r6,r7
|
||||||
|
ld r9,r4,4
|
||||||
|
st r6,r2,0
|
||||||
|
ld r6,r3,8 ; subtract 7 + 8r limbs
|
||||||
|
subu.cio r8,r8,r9
|
||||||
|
ld r7,r4,8
|
||||||
|
st r8,r2,4
|
||||||
|
ld r8,r3,12 ; subtract 6 + 8r limbs
|
||||||
|
subu.cio r6,r6,r7
|
||||||
|
ld r9,r4,12
|
||||||
|
st r6,r2,8
|
||||||
|
ld r6,r3,16 ; subtract 5 + 8r limbs
|
||||||
|
subu.cio r8,r8,r9
|
||||||
|
ld r7,r4,16
|
||||||
|
st r8,r2,12
|
||||||
|
ld r8,r3,20 ; subtract 4 + 8r limbs
|
||||||
|
subu.cio r6,r6,r7
|
||||||
|
ld r9,r4,20
|
||||||
|
st r6,r2,16
|
||||||
|
ld r6,r3,24 ; subtract 3 + 8r limbs
|
||||||
|
subu.cio r8,r8,r9
|
||||||
|
ld r7,r4,24
|
||||||
|
st r8,r2,20
|
||||||
|
ld r8,r3,28 ; subtract 2 + 8r limbs
|
||||||
|
subu.cio r6,r6,r7
|
||||||
|
ld r9,r4,28
|
||||||
|
st r6,r2,24
|
||||||
|
bcnd.n ne0,r10,Loop ; subtract 1 + 8r limbs
|
||||||
|
subu.cio r8,r8,r9
|
||||||
|
|
||||||
|
st r8,r2,28 ; store most significant limb
|
||||||
|
|
||||||
|
addu.ci r2,r0,r0 ; return carry-out from most sign. limb
|
||||||
|
jmp.n r1
|
||||||
|
xor r2,r2,1
|
84
sysdeps/m88k/m88110/mul_1.s
Normal file
84
sysdeps/m88k/m88110/mul_1.s
Normal file
@ -0,0 +1,84 @@
|
|||||||
|
; mc88110 __mpn_mul_1 -- Multiply a limb vector with a single limb and
|
||||||
|
; store the product in a second limb vector.
|
||||||
|
|
||||||
|
; Copyright (C) 1992, 1994 Free Software Foundation, Inc.
|
||||||
|
|
||||||
|
; This file is part of the GNU MP Library.
|
||||||
|
|
||||||
|
; The GNU MP Library is free software; you can redistribute it and/or modify
|
||||||
|
; it under the terms of the GNU Library General Public License as published by
|
||||||
|
; the Free Software Foundation; either version 2 of the License, or (at your
|
||||||
|
; option) any later version.
|
||||||
|
|
||||||
|
; The GNU MP Library is distributed in the hope that it will be useful, but
|
||||||
|
; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
||||||
|
; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
|
||||||
|
; License for more details.
|
||||||
|
|
||||||
|
; You should have received a copy of the GNU Library General Public License
|
||||||
|
; along with the GNU MP Library; see the file COPYING.LIB. If not, write to
|
||||||
|
; the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||||
|
|
||||||
|
|
||||||
|
; INPUT PARAMETERS
|
||||||
|
; res_ptr r2
|
||||||
|
; s1_ptr r3
|
||||||
|
; size r4
|
||||||
|
; s2_limb r5
|
||||||
|
|
||||||
|
text
|
||||||
|
align 16
|
||||||
|
global ___mpn_mul_1
|
||||||
|
___mpn_mul_1:
|
||||||
|
; Make S1_PTR and RES_PTR point at the end of their blocks
|
||||||
|
; and negate SIZE.
|
||||||
|
lda r3,r3[r4]
|
||||||
|
lda r8,r2[r4] ; RES_PTR in r8 since r2 is retval
|
||||||
|
subu r4,r0,r4
|
||||||
|
|
||||||
|
addu.co r2,r0,r0 ; r2 = cy = 0
|
||||||
|
|
||||||
|
ld r6,r3[r4]
|
||||||
|
addu r4,r4,1
|
||||||
|
mulu.d r10,r6,r5
|
||||||
|
bcnd.n eq0,r4,Lend
|
||||||
|
subu r8,r8,8
|
||||||
|
|
||||||
|
Loop: ld r6,r3[r4]
|
||||||
|
addu.cio r9,r11,r2
|
||||||
|
or r2,r10,r0 ; could be avoided if unrolled
|
||||||
|
addu r4,r4,1
|
||||||
|
mulu.d r10,r6,r5
|
||||||
|
bcnd.n ne0,r4,Loop
|
||||||
|
st r9,r8[r4]
|
||||||
|
|
||||||
|
Lend: addu.cio r9,r11,r2
|
||||||
|
st r9,r8,4
|
||||||
|
jmp.n r1
|
||||||
|
addu.ci r2,r10,r0
|
||||||
|
|
||||||
|
; This is the Right Way to do this on '110. 4 cycles / 64-bit limb.
|
||||||
|
; ld.d r10,
|
||||||
|
; mulu.d
|
||||||
|
; addu.cio
|
||||||
|
; addu.cio
|
||||||
|
; st.d
|
||||||
|
; mulu.d ,r11,r5
|
||||||
|
; ld.d r12,
|
||||||
|
; mulu.d ,r10,r5
|
||||||
|
; addu.cio
|
||||||
|
; addu.cio
|
||||||
|
; st.d
|
||||||
|
; mulu.d
|
||||||
|
; ld.d r10,
|
||||||
|
; mulu.d
|
||||||
|
; addu.cio
|
||||||
|
; addu.cio
|
||||||
|
; st.d
|
||||||
|
; mulu.d
|
||||||
|
; ld.d r10,
|
||||||
|
; mulu.d
|
||||||
|
; addu.cio
|
||||||
|
; addu.cio
|
||||||
|
; st.d
|
||||||
|
; mulu.d
|
119
sysdeps/mips/add_n.s
Normal file
119
sysdeps/mips/add_n.s
Normal file
@ -0,0 +1,119 @@
|
|||||||
|
# MIPS2 __mpn_add_n -- Add two limb vectors of the same length > 0 and
|
||||||
|
# store sum in a third limb vector.
|
||||||
|
|
||||||
|
# Copyright (C) 1995 Free Software Foundation, Inc.
|
||||||
|
|
||||||
|
# This file is part of the GNU MP Library.
|
||||||
|
|
||||||
|
# The GNU MP Library is free software; you can redistribute it and/or modify
|
||||||
|
# it under the terms of the GNU Library General Public License as published by
|
||||||
|
# the Free Software Foundation; either version 2 of the License, or (at your
|
||||||
|
# option) any later version.
|
||||||
|
|
||||||
|
# The GNU MP Library is distributed in the hope that it will be useful, but
|
||||||
|
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
||||||
|
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
|
||||||
|
# License for more details.
|
||||||
|
|
||||||
|
# You should have received a copy of the GNU Library General Public License
|
||||||
|
# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
|
||||||
|
# the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||||
|
|
||||||
|
|
||||||
|
# INPUT PARAMETERS
|
||||||
|
# res_ptr $4
|
||||||
|
# s1_ptr $5
|
||||||
|
# s2_ptr $6
|
||||||
|
# size $7
|
||||||
|
|
||||||
|
.text
|
||||||
|
.align 2
|
||||||
|
.globl __mpn_add_n
|
||||||
|
.ent __mpn_add_n
|
||||||
|
__mpn_add_n:
|
||||||
|
.set noreorder
|
||||||
|
.set nomacro
|
||||||
|
|
||||||
|
lw $10,0($5)
|
||||||
|
lw $11,0($6)
|
||||||
|
|
||||||
|
addiu $7,$7,-1
|
||||||
|
and $9,$7,4-1 # number of limbs in first loop
|
||||||
|
beq $9,$0,.L0 # if multiple of 4 limbs, skip first loop
|
||||||
|
move $2,$0
|
||||||
|
|
||||||
|
subu $7,$7,$9
|
||||||
|
|
||||||
|
.Loop0: addiu $9,$9,-1
|
||||||
|
lw $12,4($5)
|
||||||
|
addu $11,$11,$2
|
||||||
|
lw $13,4($6)
|
||||||
|
sltu $8,$11,$2
|
||||||
|
addu $11,$10,$11
|
||||||
|
sltu $2,$11,$10
|
||||||
|
sw $11,0($4)
|
||||||
|
or $2,$2,$8
|
||||||
|
|
||||||
|
addiu $5,$5,4
|
||||||
|
addiu $6,$6,4
|
||||||
|
move $10,$12
|
||||||
|
move $11,$13
|
||||||
|
bne $9,$0,.Loop0
|
||||||
|
addiu $4,$4,4
|
||||||
|
|
||||||
|
.L0: beq $7,$0,.Lend
|
||||||
|
nop
|
||||||
|
|
||||||
|
.Loop: addiu $7,$7,-4
|
||||||
|
|
||||||
|
lw $12,4($5)
|
||||||
|
addu $11,$11,$2
|
||||||
|
lw $13,4($6)
|
||||||
|
sltu $8,$11,$2
|
||||||
|
addu $11,$10,$11
|
||||||
|
sltu $2,$11,$10
|
||||||
|
sw $11,0($4)
|
||||||
|
or $2,$2,$8
|
||||||
|
|
||||||
|
lw $10,8($5)
|
||||||
|
addu $13,$13,$2
|
||||||
|
lw $11,8($6)
|
||||||
|
sltu $8,$13,$2
|
||||||
|
addu $13,$12,$13
|
||||||
|
sltu $2,$13,$12
|
||||||
|
sw $13,4($4)
|
||||||
|
or $2,$2,$8
|
||||||
|
|
||||||
|
lw $12,12($5)
|
||||||
|
addu $11,$11,$2
|
||||||
|
lw $13,12($6)
|
||||||
|
sltu $8,$11,$2
|
||||||
|
addu $11,$10,$11
|
||||||
|
sltu $2,$11,$10
|
||||||
|
sw $11,8($4)
|
||||||
|
or $2,$2,$8
|
||||||
|
|
||||||
|
lw $10,16($5)
|
||||||
|
addu $13,$13,$2
|
||||||
|
lw $11,16($6)
|
||||||
|
sltu $8,$13,$2
|
||||||
|
addu $13,$12,$13
|
||||||
|
sltu $2,$13,$12
|
||||||
|
sw $13,12($4)
|
||||||
|
or $2,$2,$8
|
||||||
|
|
||||||
|
addiu $5,$5,16
|
||||||
|
addiu $6,$6,16
|
||||||
|
|
||||||
|
bne $7,$0,.Loop
|
||||||
|
addiu $4,$4,16
|
||||||
|
|
||||||
|
.Lend: addu $11,$11,$2
|
||||||
|
sltu $8,$11,$2
|
||||||
|
addu $11,$10,$11
|
||||||
|
sltu $2,$11,$10
|
||||||
|
sw $11,0($4)
|
||||||
|
j $31
|
||||||
|
or $2,$2,$8
|
||||||
|
|
||||||
|
.end __mpn_add_n
|
96
sysdeps/mips/addmul_1.s
Normal file
96
sysdeps/mips/addmul_1.s
Normal file
@ -0,0 +1,96 @@
|
|||||||
|
# MIPS __mpn_addmul_1 -- Multiply a limb vector with a single limb and
|
||||||
|
# add the product to a second limb vector.
|
||||||
|
|
||||||
|
# Copyright (C) 1992, 1994 Free Software Foundation, Inc.
|
||||||
|
|
||||||
|
# This file is part of the GNU MP Library.
|
||||||
|
|
||||||
|
# The GNU MP Library is free software; you can redistribute it and/or modify
|
||||||
|
# it under the terms of the GNU Library General Public License as published by
|
||||||
|
# the Free Software Foundation; either version 2 of the License, or (at your
|
||||||
|
# option) any later version.
|
||||||
|
|
||||||
|
# The GNU MP Library is distributed in the hope that it will be useful, but
|
||||||
|
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
||||||
|
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
|
||||||
|
# License for more details.
|
||||||
|
|
||||||
|
# You should have received a copy of the GNU Library General Public License
|
||||||
|
# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
|
||||||
|
# the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||||
|
|
||||||
|
|
||||||
|
# INPUT PARAMETERS
|
||||||
|
# res_ptr $4
|
||||||
|
# s1_ptr $5
|
||||||
|
# size $6
|
||||||
|
# s2_limb $7
|
||||||
|
|
||||||
|
.text
|
||||||
|
.align 4
|
||||||
|
.globl __mpn_addmul_1
|
||||||
|
.ent __mpn_addmul_1
|
||||||
|
__mpn_addmul_1:
|
||||||
|
.set noreorder
|
||||||
|
.set nomacro
|
||||||
|
|
||||||
|
# warm up phase 0
|
||||||
|
lw $8,0($5)
|
||||||
|
|
||||||
|
# warm up phase 1
|
||||||
|
addiu $5,$5,4
|
||||||
|
multu $8,$7
|
||||||
|
|
||||||
|
addiu $6,$6,-1
|
||||||
|
beq $6,$0,$LC0
|
||||||
|
move $2,$0 # zero cy2
|
||||||
|
|
||||||
|
addiu $6,$6,-1
|
||||||
|
beq $6,$0,$LC1
|
||||||
|
lw $8,0($5) # load new s1 limb as early as possible
|
||||||
|
|
||||||
|
Loop: lw $10,0($4)
|
||||||
|
mflo $3
|
||||||
|
mfhi $9
|
||||||
|
addiu $5,$5,4
|
||||||
|
addu $3,$3,$2 # add old carry limb to low product limb
|
||||||
|
multu $8,$7
|
||||||
|
lw $8,0($5) # load new s1 limb as early as possible
|
||||||
|
addiu $6,$6,-1 # decrement loop counter
|
||||||
|
sltu $2,$3,$2 # carry from previous addition -> $2
|
||||||
|
addu $3,$10,$3
|
||||||
|
sltu $10,$3,$10
|
||||||
|
addu $2,$2,$10
|
||||||
|
sw $3,0($4)
|
||||||
|
addiu $4,$4,4
|
||||||
|
bne $6,$0,Loop # should be "bnel"
|
||||||
|
addu $2,$9,$2 # add high product limb and carry from addition
|
||||||
|
|
||||||
|
# cool down phase 1
|
||||||
|
$LC1: lw $10,0($4)
|
||||||
|
mflo $3
|
||||||
|
mfhi $9
|
||||||
|
addu $3,$3,$2
|
||||||
|
sltu $2,$3,$2
|
||||||
|
multu $8,$7
|
||||||
|
addu $3,$10,$3
|
||||||
|
sltu $10,$3,$10
|
||||||
|
addu $2,$2,$10
|
||||||
|
sw $3,0($4)
|
||||||
|
addiu $4,$4,4
|
||||||
|
addu $2,$9,$2 # add high product limb and carry from addition
|
||||||
|
|
||||||
|
# cool down phase 0
|
||||||
|
$LC0: lw $10,0($4)
|
||||||
|
mflo $3
|
||||||
|
mfhi $9
|
||||||
|
addu $3,$3,$2
|
||||||
|
sltu $2,$3,$2
|
||||||
|
addu $3,$10,$3
|
||||||
|
sltu $10,$3,$10
|
||||||
|
addu $2,$2,$10
|
||||||
|
sw $3,0($4)
|
||||||
|
j $31
|
||||||
|
addu $2,$9,$2 # add high product limb and carry from addition
|
||||||
|
|
||||||
|
.end __mpn_addmul_1
|
94
sysdeps/mips/lshift.s
Normal file
94
sysdeps/mips/lshift.s
Normal file
@ -0,0 +1,94 @@
|
|||||||
|
# MIPS2 __mpn_lshift --
|
||||||
|
|
||||||
|
# Copyright (C) 1995 Free Software Foundation, Inc.
|
||||||
|
|
||||||
|
# This file is part of the GNU MP Library.
|
||||||
|
|
||||||
|
# The GNU MP Library is free software; you can redistribute it and/or modify
|
||||||
|
# it under the terms of the GNU Library General Public License as published by
|
||||||
|
# the Free Software Foundation; either version 2 of the License, or (at your
|
||||||
|
# option) any later version.
|
||||||
|
|
||||||
|
# The GNU MP Library is distributed in the hope that it will be useful, but
|
||||||
|
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
||||||
|
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
|
||||||
|
# License for more details.
|
||||||
|
|
||||||
|
# You should have received a copy of the GNU Library General Public License
|
||||||
|
# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
|
||||||
|
# the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||||
|
|
||||||
|
|
||||||
|
# INPUT PARAMETERS
|
||||||
|
# res_ptr $4
|
||||||
|
# src_ptr $5
|
||||||
|
# size $6
|
||||||
|
# cnt $7
|
||||||
|
|
||||||
|
.text
|
||||||
|
.align 2
|
||||||
|
.globl __mpn_lshift
|
||||||
|
.ent __mpn_lshift
|
||||||
|
__mpn_lshift:
|
||||||
|
.set noreorder
|
||||||
|
.set nomacro
|
||||||
|
|
||||||
|
sll $2,$6,2
|
||||||
|
addu $5,$5,$2 # make r5 point at end of src
|
||||||
|
lw $10,-4($5) # load first limb
|
||||||
|
subu $13,$0,$7
|
||||||
|
addu $4,$4,$2 # make r4 point at end of res
|
||||||
|
addiu $6,$6,-1
|
||||||
|
and $9,$6,4-1 # number of limbs in first loop
|
||||||
|
beq $9,$0,.L0 # if multiple of 4 limbs, skip first loop
|
||||||
|
srl $2,$10,$13 # compute function result
|
||||||
|
|
||||||
|
subu $6,$6,$9
|
||||||
|
|
||||||
|
.Loop0: lw $3,-8($5)
|
||||||
|
addiu $4,$4,-4
|
||||||
|
addiu $5,$5,-4
|
||||||
|
addiu $9,$9,-1
|
||||||
|
sll $11,$10,$7
|
||||||
|
srl $12,$3,$13
|
||||||
|
move $10,$3
|
||||||
|
or $8,$11,$12
|
||||||
|
bne $9,$0,.Loop0
|
||||||
|
sw $8,0($4)
|
||||||
|
|
||||||
|
.L0: beq $6,$0,.Lend
|
||||||
|
nop
|
||||||
|
|
||||||
|
.Loop: lw $3,-8($5)
|
||||||
|
addiu $4,$4,-16
|
||||||
|
addiu $6,$6,-4
|
||||||
|
sll $11,$10,$7
|
||||||
|
srl $12,$3,$13
|
||||||
|
|
||||||
|
lw $10,-12($5)
|
||||||
|
sll $14,$3,$7
|
||||||
|
or $8,$11,$12
|
||||||
|
sw $8,12($4)
|
||||||
|
srl $9,$10,$13
|
||||||
|
|
||||||
|
lw $3,-16($5)
|
||||||
|
sll $11,$10,$7
|
||||||
|
or $8,$14,$9
|
||||||
|
sw $8,8($4)
|
||||||
|
srl $12,$3,$13
|
||||||
|
|
||||||
|
lw $10,-20($5)
|
||||||
|
sll $14,$3,$7
|
||||||
|
or $8,$11,$12
|
||||||
|
sw $8,4($4)
|
||||||
|
srl $9,$10,$13
|
||||||
|
|
||||||
|
addiu $5,$5,-16
|
||||||
|
or $8,$14,$9
|
||||||
|
bgtz $6,.Loop
|
||||||
|
sw $8,0($4)
|
||||||
|
|
||||||
|
.Lend: sll $8,$10,$7
|
||||||
|
j $31
|
||||||
|
sw $8,-4($4)
|
||||||
|
.end __mpn_lshift
|
119
sysdeps/mips/mips3/add_n.s
Normal file
119
sysdeps/mips/mips3/add_n.s
Normal file
@ -0,0 +1,119 @@
|
|||||||
|
# MIPS3 __mpn_add_n -- Add two limb vectors of the same length > 0 and
|
||||||
|
# store sum in a third limb vector.
|
||||||
|
|
||||||
|
# Copyright (C) 1995 Free Software Foundation, Inc.
|
||||||
|
|
||||||
|
# This file is part of the GNU MP Library.
|
||||||
|
|
||||||
|
# The GNU MP Library is free software; you can redistribute it and/or modify
|
||||||
|
# it under the terms of the GNU Library General Public License as published by
|
||||||
|
# the Free Software Foundation; either version 2 of the License, or (at your
|
||||||
|
# option) any later version.
|
||||||
|
|
||||||
|
# The GNU MP Library is distributed in the hope that it will be useful, but
|
||||||
|
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
||||||
|
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
|
||||||
|
# License for more details.
|
||||||
|
|
||||||
|
# You should have received a copy of the GNU Library General Public License
|
||||||
|
# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
|
||||||
|
# the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||||
|
|
||||||
|
|
||||||
|
# INPUT PARAMETERS
|
||||||
|
# res_ptr $4
|
||||||
|
# s1_ptr $5
|
||||||
|
# s2_ptr $6
|
||||||
|
# size $7
|
||||||
|
|
||||||
|
.text
|
||||||
|
.align 2
|
||||||
|
.globl __mpn_add_n
|
||||||
|
.ent __mpn_add_n
|
||||||
|
__mpn_add_n:
|
||||||
|
.set noreorder
|
||||||
|
.set nomacro
|
||||||
|
|
||||||
|
ld $10,0($5)
|
||||||
|
ld $11,0($6)
|
||||||
|
|
||||||
|
daddiu $7,$7,-1
|
||||||
|
and $9,$7,4-1 # number of limbs in first loop
|
||||||
|
beq $9,$0,.L0 # if multiple of 4 limbs, skip first loop
|
||||||
|
move $2,$0
|
||||||
|
|
||||||
|
dsubu $7,$7,$9
|
||||||
|
|
||||||
|
.Loop0: daddiu $9,$9,-1
|
||||||
|
ld $12,8($5)
|
||||||
|
daddu $11,$11,$2
|
||||||
|
ld $13,8($6)
|
||||||
|
sltu $8,$11,$2
|
||||||
|
daddu $11,$10,$11
|
||||||
|
sltu $2,$11,$10
|
||||||
|
sd $11,0($4)
|
||||||
|
or $2,$2,$8
|
||||||
|
|
||||||
|
daddiu $5,$5,8
|
||||||
|
daddiu $6,$6,8
|
||||||
|
move $10,$12
|
||||||
|
move $11,$13
|
||||||
|
bne $9,$0,.Loop0
|
||||||
|
daddiu $4,$4,8
|
||||||
|
|
||||||
|
.L0: beq $7,$0,.Lend
|
||||||
|
nop
|
||||||
|
|
||||||
|
.Loop: daddiu $7,$7,-4
|
||||||
|
|
||||||
|
ld $12,8($5)
|
||||||
|
daddu $11,$11,$2
|
||||||
|
ld $13,8($6)
|
||||||
|
sltu $8,$11,$2
|
||||||
|
daddu $11,$10,$11
|
||||||
|
sltu $2,$11,$10
|
||||||
|
sd $11,0($4)
|
||||||
|
or $2,$2,$8
|
||||||
|
|
||||||
|
ld $10,16($5)
|
||||||
|
daddu $13,$13,$2
|
||||||
|
ld $11,16($6)
|
||||||
|
sltu $8,$13,$2
|
||||||
|
daddu $13,$12,$13
|
||||||
|
sltu $2,$13,$12
|
||||||
|
sd $13,8($4)
|
||||||
|
or $2,$2,$8
|
||||||
|
|
||||||
|
ld $12,24($5)
|
||||||
|
daddu $11,$11,$2
|
||||||
|
ld $13,24($6)
|
||||||
|
sltu $8,$11,$2
|
||||||
|
daddu $11,$10,$11
|
||||||
|
sltu $2,$11,$10
|
||||||
|
sd $11,16($4)
|
||||||
|
or $2,$2,$8
|
||||||
|
|
||||||
|
ld $10,32($5)
|
||||||
|
daddu $13,$13,$2
|
||||||
|
ld $11,32($6)
|
||||||
|
sltu $8,$13,$2
|
||||||
|
daddu $13,$12,$13
|
||||||
|
sltu $2,$13,$12
|
||||||
|
sd $13,24($4)
|
||||||
|
or $2,$2,$8
|
||||||
|
|
||||||
|
daddiu $5,$5,32
|
||||||
|
daddiu $6,$6,32
|
||||||
|
|
||||||
|
bne $7,$0,.Loop
|
||||||
|
daddiu $4,$4,32
|
||||||
|
|
||||||
|
.Lend: daddu $11,$11,$2
|
||||||
|
sltu $8,$11,$2
|
||||||
|
daddu $11,$10,$11
|
||||||
|
sltu $2,$11,$10
|
||||||
|
sd $11,0($4)
|
||||||
|
j $31
|
||||||
|
or $2,$2,$8
|
||||||
|
|
||||||
|
.end __mpn_add_n
|
96
sysdeps/mips/mips3/addmul_1.s
Normal file
96
sysdeps/mips/mips3/addmul_1.s
Normal file
@ -0,0 +1,96 @@
|
|||||||
|
# MIPS3 __mpn_addmul_1 -- Multiply a limb vector with a single limb and
|
||||||
|
# add the product to a second limb vector.
|
||||||
|
|
||||||
|
# Copyright (C) 1992, 1994, 1995 Free Software Foundation, Inc.
|
||||||
|
|
||||||
|
# This file is part of the GNU MP Library.
|
||||||
|
|
||||||
|
# The GNU MP Library is free software; you can redistribute it and/or modify
|
||||||
|
# it under the terms of the GNU Library General Public License as published by
|
||||||
|
# the Free Software Foundation; either version 2 of the License, or (at your
|
||||||
|
# option) any later version.
|
||||||
|
|
||||||
|
# The GNU MP Library is distributed in the hope that it will be useful, but
|
||||||
|
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
||||||
|
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
|
||||||
|
# License for more details.
|
||||||
|
|
||||||
|
# You should have received a copy of the GNU Library General Public License
|
||||||
|
# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
|
||||||
|
# the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||||
|
|
||||||
|
|
||||||
|
# INPUT PARAMETERS
|
||||||
|
# res_ptr $4
|
||||||
|
# s1_ptr $5
|
||||||
|
# size $6
|
||||||
|
# s2_limb $7
|
||||||
|
|
||||||
|
.text
|
||||||
|
.align 4
|
||||||
|
.globl __mpn_addmul_1
|
||||||
|
.ent __mpn_addmul_1
|
||||||
|
__mpn_addmul_1:
|
||||||
|
.set noreorder
|
||||||
|
.set nomacro
|
||||||
|
|
||||||
|
# warm up phase 0
|
||||||
|
ld $8,0($5)
|
||||||
|
|
||||||
|
# warm up phase 1
|
||||||
|
daddiu $5,$5,8
|
||||||
|
dmultu $8,$7
|
||||||
|
|
||||||
|
daddiu $6,$6,-1
|
||||||
|
beq $6,$0,$LC0
|
||||||
|
move $2,$0 # zero cy2
|
||||||
|
|
||||||
|
daddiu $6,$6,-1
|
||||||
|
beq $6,$0,$LC1
|
||||||
|
ld $8,0($5) # load new s1 limb as early as possible
|
||||||
|
|
||||||
|
Loop: ld $10,0($4)
|
||||||
|
mflo $3
|
||||||
|
mfhi $9
|
||||||
|
daddiu $5,$5,8
|
||||||
|
daddu $3,$3,$2 # add old carry limb to low product limb
|
||||||
|
dmultu $8,$7
|
||||||
|
ld $8,0($5) # load new s1 limb as early as possible
|
||||||
|
daddiu $6,$6,-1 # decrement loop counter
|
||||||
|
sltu $2,$3,$2 # carry from previous addition -> $2
|
||||||
|
daddu $3,$10,$3
|
||||||
|
sltu $10,$3,$10
|
||||||
|
daddu $2,$2,$10
|
||||||
|
sd $3,0($4)
|
||||||
|
daddiu $4,$4,8
|
||||||
|
bne $6,$0,Loop # should be "bnel"
|
||||||
|
daddu $2,$9,$2 # add high product limb and carry from addition
|
||||||
|
|
||||||
|
# cool down phase 1
|
||||||
|
$LC1: ld $10,0($4)
|
||||||
|
mflo $3
|
||||||
|
mfhi $9
|
||||||
|
daddu $3,$3,$2
|
||||||
|
sltu $2,$3,$2
|
||||||
|
dmultu $8,$7
|
||||||
|
daddu $3,$10,$3
|
||||||
|
sltu $10,$3,$10
|
||||||
|
daddu $2,$2,$10
|
||||||
|
sd $3,0($4)
|
||||||
|
daddiu $4,$4,8
|
||||||
|
daddu $2,$9,$2 # add high product limb and carry from addition
|
||||||
|
|
||||||
|
# cool down phase 0
|
||||||
|
$LC0: ld $10,0($4)
|
||||||
|
mflo $3
|
||||||
|
mfhi $9
|
||||||
|
daddu $3,$3,$2
|
||||||
|
sltu $2,$3,$2
|
||||||
|
daddu $3,$10,$3
|
||||||
|
sltu $10,$3,$10
|
||||||
|
daddu $2,$2,$10
|
||||||
|
sd $3,0($4)
|
||||||
|
j $31
|
||||||
|
daddu $2,$9,$2 # add high product limb and carry from addition
|
||||||
|
|
||||||
|
.end __mpn_addmul_1
|
94
sysdeps/mips/mips3/lshift.s
Normal file
94
sysdeps/mips/mips3/lshift.s
Normal file
@ -0,0 +1,94 @@
|
|||||||
|
# MIPS3 __mpn_lshift --
|
||||||
|
|
||||||
|
# Copyright (C) 1995 Free Software Foundation, Inc.
|
||||||
|
|
||||||
|
# This file is part of the GNU MP Library.
|
||||||
|
|
||||||
|
# The GNU MP Library is free software; you can redistribute it and/or modify
|
||||||
|
# it under the terms of the GNU Library General Public License as published by
|
||||||
|
# the Free Software Foundation; either version 2 of the License, or (at your
|
||||||
|
# option) any later version.
|
||||||
|
|
||||||
|
# The GNU MP Library is distributed in the hope that it will be useful, but
|
||||||
|
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
||||||
|
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
|
||||||
|
# License for more details.
|
||||||
|
|
||||||
|
# You should have received a copy of the GNU Library General Public License
|
||||||
|
# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
|
||||||
|
# the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||||
|
|
||||||
|
|
||||||
|
# INPUT PARAMETERS
|
||||||
|
# res_ptr $4
|
||||||
|
# src_ptr $5
|
||||||
|
# size $6
|
||||||
|
# cnt $7
|
||||||
|
|
||||||
|
.text
|
||||||
|
.align 2
|
||||||
|
.globl __mpn_lshift
|
||||||
|
.ent __mpn_lshift
|
||||||
|
__mpn_lshift:
|
||||||
|
.set noreorder
|
||||||
|
.set nomacro
|
||||||
|
|
||||||
|
dsll $2,$6,3
|
||||||
|
daddu $5,$5,$2 # make r5 point at end of src
|
||||||
|
ld $10,-8($5) # load first limb
|
||||||
|
dsubu $13,$0,$7
|
||||||
|
daddu $4,$4,$2 # make r4 point at end of res
|
||||||
|
daddiu $6,$6,-1
|
||||||
|
and $9,$6,4-1 # number of limbs in first loop
|
||||||
|
beq $9,$0,.L0 # if multiple of 4 limbs, skip first loop
|
||||||
|
dsrl $2,$10,$13 # compute function result
|
||||||
|
|
||||||
|
dsubu $6,$6,$9
|
||||||
|
|
||||||
|
.Loop0: ld $3,-16($5)
|
||||||
|
daddiu $4,$4,-8
|
||||||
|
daddiu $5,$5,-8
|
||||||
|
daddiu $9,$9,-1
|
||||||
|
dsll $11,$10,$7
|
||||||
|
dsrl $12,$3,$13
|
||||||
|
move $10,$3
|
||||||
|
or $8,$11,$12
|
||||||
|
bne $9,$0,.Loop0
|
||||||
|
sd $8,0($4)
|
||||||
|
|
||||||
|
.L0: beq $6,$0,.Lend
|
||||||
|
nop
|
||||||
|
|
||||||
|
.Loop: ld $3,-16($5)
|
||||||
|
daddiu $4,$4,-32
|
||||||
|
daddiu $6,$6,-4
|
||||||
|
dsll $11,$10,$7
|
||||||
|
dsrl $12,$3,$13
|
||||||
|
|
||||||
|
ld $10,-24($5)
|
||||||
|
dsll $14,$3,$7
|
||||||
|
or $8,$11,$12
|
||||||
|
sd $8,24($4)
|
||||||
|
dsrl $9,$10,$13
|
||||||
|
|
||||||
|
ld $3,-32($5)
|
||||||
|
dsll $11,$10,$7
|
||||||
|
or $8,$14,$9
|
||||||
|
sd $8,16($4)
|
||||||
|
dsrl $12,$3,$13
|
||||||
|
|
||||||
|
ld $10,-40($5)
|
||||||
|
dsll $14,$3,$7
|
||||||
|
or $8,$11,$12
|
||||||
|
sd $8,8($4)
|
||||||
|
dsrl $9,$10,$13
|
||||||
|
|
||||||
|
daddiu $5,$5,-32
|
||||||
|
or $8,$14,$9
|
||||||
|
bgtz $6,.Loop
|
||||||
|
sd $8,0($4)
|
||||||
|
|
||||||
|
.Lend: dsll $8,$10,$7
|
||||||
|
j $31
|
||||||
|
sd $8,-8($4)
|
||||||
|
.end __mpn_lshift
|
84
sysdeps/mips/mips3/mul_1.s
Normal file
84
sysdeps/mips/mips3/mul_1.s
Normal file
@ -0,0 +1,84 @@
|
|||||||
|
# MIPS3 __mpn_mul_1 -- Multiply a limb vector with a single limb and
|
||||||
|
# store the product in a second limb vector.
|
||||||
|
|
||||||
|
# Copyright (C) 1992, 1994, 1995 Free Software Foundation, Inc.
|
||||||
|
|
||||||
|
# This file is part of the GNU MP Library.
|
||||||
|
|
||||||
|
# The GNU MP Library is free software; you can redistribute it and/or modify
|
||||||
|
# it under the terms of the GNU Library General Public License as published by
|
||||||
|
# the Free Software Foundation; either version 2 of the License, or (at your
|
||||||
|
# option) any later version.
|
||||||
|
|
||||||
|
# The GNU MP Library is distributed in the hope that it will be useful, but
|
||||||
|
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
||||||
|
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
|
||||||
|
# License for more details.
|
||||||
|
|
||||||
|
# You should have received a copy of the GNU Library General Public License
|
||||||
|
# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
|
||||||
|
# the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||||
|
|
||||||
|
|
||||||
|
# INPUT PARAMETERS
|
||||||
|
# res_ptr $4
|
||||||
|
# s1_ptr $5
|
||||||
|
# size $6
|
||||||
|
# s2_limb $7
|
||||||
|
|
||||||
|
.text
|
||||||
|
.align 4
|
||||||
|
.globl __mpn_mul_1
|
||||||
|
.ent __mpn_mul_1
|
||||||
|
__mpn_mul_1:
|
||||||
|
.set noreorder
|
||||||
|
.set nomacro
|
||||||
|
|
||||||
|
# warm up phase 0
|
||||||
|
ld $8,0($5)
|
||||||
|
|
||||||
|
# warm up phase 1
|
||||||
|
daddiu $5,$5,8
|
||||||
|
dmultu $8,$7
|
||||||
|
|
||||||
|
daddiu $6,$6,-1
|
||||||
|
beq $6,$0,$LC0
|
||||||
|
move $2,$0 # zero cy2
|
||||||
|
|
||||||
|
daddiu $6,$6,-1
|
||||||
|
beq $6,$0,$LC1
|
||||||
|
ld $8,0($5) # load new s1 limb as early as possible
|
||||||
|
|
||||||
|
Loop: mflo $10
|
||||||
|
mfhi $9
|
||||||
|
daddiu $5,$5,8
|
||||||
|
daddu $10,$10,$2 # add old carry limb to low product limb
|
||||||
|
dmultu $8,$7
|
||||||
|
ld $8,0($5) # load new s1 limb as early as possible
|
||||||
|
daddiu $6,$6,-1 # decrement loop counter
|
||||||
|
sltu $2,$10,$2 # carry from previous addition -> $2
|
||||||
|
sd $10,0($4)
|
||||||
|
daddiu $4,$4,8
|
||||||
|
bne $6,$0,Loop # should be "bnel"
|
||||||
|
daddu $2,$9,$2 # add high product limb and carry from addition
|
||||||
|
|
||||||
|
# cool down phase 1
|
||||||
|
$LC1: mflo $10
|
||||||
|
mfhi $9
|
||||||
|
daddu $10,$10,$2
|
||||||
|
sltu $2,$10,$2
|
||||||
|
dmultu $8,$7
|
||||||
|
sd $10,0($4)
|
||||||
|
daddiu $4,$4,8
|
||||||
|
daddu $2,$9,$2 # add high product limb and carry from addition
|
||||||
|
|
||||||
|
# cool down phase 0
|
||||||
|
$LC0: mflo $10
|
||||||
|
mfhi $9
|
||||||
|
daddu $10,$10,$2
|
||||||
|
sltu $2,$10,$2
|
||||||
|
sd $10,0($4)
|
||||||
|
j $31
|
||||||
|
daddu $2,$9,$2 # add high product limb and carry from addition
|
||||||
|
|
||||||
|
.end __mpn_mul_1
|
91
sysdeps/mips/mips3/rshift.s
Normal file
91
sysdeps/mips/mips3/rshift.s
Normal file
@ -0,0 +1,91 @@
|
|||||||
|
# MIPS3 __mpn_rshift --
|
||||||
|
|
||||||
|
# Copyright (C) 1995 Free Software Foundation, Inc.
|
||||||
|
|
||||||
|
# This file is part of the GNU MP Library.
|
||||||
|
|
||||||
|
# The GNU MP Library is free software; you can redistribute it and/or modify
|
||||||
|
# it under the terms of the GNU Library General Public License as published by
|
||||||
|
# the Free Software Foundation; either version 2 of the License, or (at your
|
||||||
|
# option) any later version.
|
||||||
|
|
||||||
|
# The GNU MP Library is distributed in the hope that it will be useful, but
|
||||||
|
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
||||||
|
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
|
||||||
|
# License for more details.
|
||||||
|
|
||||||
|
# You should have received a copy of the GNU Library General Public License
|
||||||
|
# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
|
||||||
|
# the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||||
|
|
||||||
|
|
||||||
|
# INPUT PARAMETERS
|
||||||
|
# res_ptr $4
|
||||||
|
# src_ptr $5
|
||||||
|
# size $6
|
||||||
|
# cnt $7
|
||||||
|
|
||||||
|
.text
|
||||||
|
.align 2
|
||||||
|
.globl __mpn_rshift
|
||||||
|
.ent __mpn_rshift
|
||||||
|
__mpn_rshift:
|
||||||
|
.set noreorder
|
||||||
|
.set nomacro
|
||||||
|
|
||||||
|
ld $10,0($5) # load first limb
|
||||||
|
dsubu $13,$0,$7
|
||||||
|
daddiu $6,$6,-1
|
||||||
|
and $9,$6,4-1 # number of limbs in first loop
|
||||||
|
beq $9,$0,.L0 # if multiple of 4 limbs, skip first loop
|
||||||
|
dsll $2,$10,$13 # compute function result
|
||||||
|
|
||||||
|
dsubu $6,$6,$9
|
||||||
|
|
||||||
|
.Loop0: ld $3,8($5)
|
||||||
|
daddiu $4,$4,8
|
||||||
|
daddiu $5,$5,8
|
||||||
|
daddiu $9,$9,-1
|
||||||
|
dsrl $11,$10,$7
|
||||||
|
dsll $12,$3,$13
|
||||||
|
move $10,$3
|
||||||
|
or $8,$11,$12
|
||||||
|
bne $9,$0,.Loop0
|
||||||
|
sd $8,-8($4)
|
||||||
|
|
||||||
|
.L0: beq $6,$0,.Lend
|
||||||
|
nop
|
||||||
|
|
||||||
|
.Loop: ld $3,8($5)
|
||||||
|
daddiu $4,$4,32
|
||||||
|
daddiu $6,$6,-4
|
||||||
|
dsrl $11,$10,$7
|
||||||
|
dsll $12,$3,$13
|
||||||
|
|
||||||
|
ld $10,16($5)
|
||||||
|
dsrl $14,$3,$7
|
||||||
|
or $8,$11,$12
|
||||||
|
sd $8,-32($4)
|
||||||
|
dsll $9,$10,$13
|
||||||
|
|
||||||
|
ld $3,24($5)
|
||||||
|
dsrl $11,$10,$7
|
||||||
|
or $8,$14,$9
|
||||||
|
sd $8,-24($4)
|
||||||
|
dsll $12,$3,$13
|
||||||
|
|
||||||
|
ld $10,32($5)
|
||||||
|
dsrl $14,$3,$7
|
||||||
|
or $8,$11,$12
|
||||||
|
sd $8,-16($4)
|
||||||
|
dsll $9,$10,$13
|
||||||
|
|
||||||
|
daddiu $5,$5,32
|
||||||
|
or $8,$14,$9
|
||||||
|
bgtz $6,.Loop
|
||||||
|
sd $8,-8($4)
|
||||||
|
|
||||||
|
.Lend: dsrl $8,$10,$7
|
||||||
|
j $31
|
||||||
|
sd $8,0($4)
|
||||||
|
.end __mpn_rshift
|
119
sysdeps/mips/mips3/sub_n.s
Normal file
119
sysdeps/mips/mips3/sub_n.s
Normal file
@ -0,0 +1,119 @@
|
|||||||
|
# MIPS3 __mpn_sub_n -- Subtract two limb vectors of the same length > 0 and
|
||||||
|
# store difference in a third limb vector.
|
||||||
|
|
||||||
|
# Copyright (C) 1995 Free Software Foundation, Inc.
|
||||||
|
|
||||||
|
# This file is part of the GNU MP Library.
|
||||||
|
|
||||||
|
# The GNU MP Library is free software; you can redistribute it and/or modify
|
||||||
|
# it under the terms of the GNU Library General Public License as published by
|
||||||
|
# the Free Software Foundation; either version 2 of the License, or (at your
|
||||||
|
# option) any later version.
|
||||||
|
|
||||||
|
# The GNU MP Library is distributed in the hope that it will be useful, but
|
||||||
|
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
||||||
|
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
|
||||||
|
# License for more details.
|
||||||
|
|
||||||
|
# You should have received a copy of the GNU Library General Public License
|
||||||
|
# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
|
||||||
|
# the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||||
|
|
||||||
|
|
||||||
|
# INPUT PARAMETERS
|
||||||
|
# res_ptr $4
|
||||||
|
# s1_ptr $5
|
||||||
|
# s2_ptr $6
|
||||||
|
# size $7
|
||||||
|
|
||||||
|
.text
|
||||||
|
.align 2
|
||||||
|
.globl __mpn_sub_n
|
||||||
|
.ent __mpn_sub_n
|
||||||
|
__mpn_sub_n:
|
||||||
|
.set noreorder
|
||||||
|
.set nomacro
|
||||||
|
|
||||||
|
ld $10,0($5)
|
||||||
|
ld $11,0($6)
|
||||||
|
|
||||||
|
daddiu $7,$7,-1
|
||||||
|
and $9,$7,4-1 # number of limbs in first loop
|
||||||
|
beq $9,$0,.L0 # if multiple of 4 limbs, skip first loop
|
||||||
|
move $2,$0
|
||||||
|
|
||||||
|
dsubu $7,$7,$9
|
||||||
|
|
||||||
|
.Loop0: daddiu $9,$9,-1
|
||||||
|
ld $12,8($5)
|
||||||
|
daddu $11,$11,$2
|
||||||
|
ld $13,8($6)
|
||||||
|
sltu $8,$11,$2
|
||||||
|
dsubu $11,$10,$11
|
||||||
|
sltu $2,$10,$11
|
||||||
|
sd $11,0($4)
|
||||||
|
or $2,$2,$8
|
||||||
|
|
||||||
|
daddiu $5,$5,8
|
||||||
|
daddiu $6,$6,8
|
||||||
|
move $10,$12
|
||||||
|
move $11,$13
|
||||||
|
bne $9,$0,.Loop0
|
||||||
|
daddiu $4,$4,8
|
||||||
|
|
||||||
|
.L0: beq $7,$0,.Lend
|
||||||
|
nop
|
||||||
|
|
||||||
|
.Loop: daddiu $7,$7,-4
|
||||||
|
|
||||||
|
ld $12,8($5)
|
||||||
|
daddu $11,$11,$2
|
||||||
|
ld $13,8($6)
|
||||||
|
sltu $8,$11,$2
|
||||||
|
dsubu $11,$10,$11
|
||||||
|
sltu $2,$10,$11
|
||||||
|
sd $11,0($4)
|
||||||
|
or $2,$2,$8
|
||||||
|
|
||||||
|
ld $10,16($5)
|
||||||
|
daddu $13,$13,$2
|
||||||
|
ld $11,16($6)
|
||||||
|
sltu $8,$13,$2
|
||||||
|
dsubu $13,$12,$13
|
||||||
|
sltu $2,$12,$13
|
||||||
|
sd $13,8($4)
|
||||||
|
or $2,$2,$8
|
||||||
|
|
||||||
|
ld $12,24($5)
|
||||||
|
daddu $11,$11,$2
|
||||||
|
ld $13,24($6)
|
||||||
|
sltu $8,$11,$2
|
||||||
|
dsubu $11,$10,$11
|
||||||
|
sltu $2,$10,$11
|
||||||
|
sd $11,16($4)
|
||||||
|
or $2,$2,$8
|
||||||
|
|
||||||
|
ld $10,32($5)
|
||||||
|
daddu $13,$13,$2
|
||||||
|
ld $11,32($6)
|
||||||
|
sltu $8,$13,$2
|
||||||
|
dsubu $13,$12,$13
|
||||||
|
sltu $2,$12,$13
|
||||||
|
sd $13,24($4)
|
||||||
|
or $2,$2,$8
|
||||||
|
|
||||||
|
daddiu $5,$5,32
|
||||||
|
daddiu $6,$6,32
|
||||||
|
|
||||||
|
bne $7,$0,.Loop
|
||||||
|
daddiu $4,$4,32
|
||||||
|
|
||||||
|
.Lend: daddu $11,$11,$2
|
||||||
|
sltu $8,$11,$2
|
||||||
|
dsubu $11,$10,$11
|
||||||
|
sltu $2,$10,$11
|
||||||
|
sd $11,0($4)
|
||||||
|
j $31
|
||||||
|
or $2,$2,$8
|
||||||
|
|
||||||
|
.end __mpn_sub_n
|
96
sysdeps/mips/mips3/submul_1.s
Normal file
96
sysdeps/mips/mips3/submul_1.s
Normal file
@ -0,0 +1,96 @@
|
|||||||
|
# MIPS3 __mpn_submul_1 -- Multiply a limb vector with a single limb and
|
||||||
|
# subtract the product from a second limb vector.
|
||||||
|
|
||||||
|
# Copyright (C) 1992, 1994, 1995 Free Software Foundation, Inc.
|
||||||
|
|
||||||
|
# This file is part of the GNU MP Library.
|
||||||
|
|
||||||
|
# The GNU MP Library is free software; you can redistribute it and/or modify
|
||||||
|
# it under the terms of the GNU Library General Public License as published by
|
||||||
|
# the Free Software Foundation; either version 2 of the License, or (at your
|
||||||
|
# option) any later version.
|
||||||
|
|
||||||
|
# The GNU MP Library is distributed in the hope that it will be useful, but
|
||||||
|
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
||||||
|
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
|
||||||
|
# License for more details.
|
||||||
|
|
||||||
|
# You should have received a copy of the GNU Library General Public License
|
||||||
|
# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
|
||||||
|
# the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||||
|
|
||||||
|
|
||||||
|
# INPUT PARAMETERS
|
||||||
|
# res_ptr $4
|
||||||
|
# s1_ptr $5
|
||||||
|
# size $6
|
||||||
|
# s2_limb $7
|
||||||
|
|
||||||
|
.text
|
||||||
|
.align 4
|
||||||
|
.globl __mpn_submul_1
|
||||||
|
.ent __mpn_submul_1
|
||||||
|
__mpn_submul_1:
|
||||||
|
.set noreorder
|
||||||
|
.set nomacro
|
||||||
|
|
||||||
|
# warm up phase 0
|
||||||
|
ld $8,0($5)
|
||||||
|
|
||||||
|
# warm up phase 1
|
||||||
|
daddiu $5,$5,8
|
||||||
|
dmultu $8,$7
|
||||||
|
|
||||||
|
daddiu $6,$6,-1
|
||||||
|
beq $6,$0,$LC0
|
||||||
|
move $2,$0 # zero cy2
|
||||||
|
|
||||||
|
daddiu $6,$6,-1
|
||||||
|
beq $6,$0,$LC1
|
||||||
|
ld $8,0($5) # load new s1 limb as early as possible
|
||||||
|
|
||||||
|
Loop: ld $10,0($4)
|
||||||
|
mflo $3
|
||||||
|
mfhi $9
|
||||||
|
daddiu $5,$5,8
|
||||||
|
daddu $3,$3,$2 # add old carry limb to low product limb
|
||||||
|
dmultu $8,$7
|
||||||
|
ld $8,0($5) # load new s1 limb as early as possible
|
||||||
|
daddiu $6,$6,-1 # decrement loop counter
|
||||||
|
sltu $2,$3,$2 # carry from previous addition -> $2
|
||||||
|
dsubu $3,$10,$3
|
||||||
|
sgtu $10,$3,$10
|
||||||
|
daddu $2,$2,$10
|
||||||
|
sd $3,0($4)
|
||||||
|
daddiu $4,$4,8
|
||||||
|
bne $6,$0,Loop # should be "bnel"
|
||||||
|
daddu $2,$9,$2 # add high product limb and carry from addition
|
||||||
|
|
||||||
|
# cool down phase 1
|
||||||
|
$LC1: ld $10,0($4)
|
||||||
|
mflo $3
|
||||||
|
mfhi $9
|
||||||
|
daddu $3,$3,$2
|
||||||
|
sltu $2,$3,$2
|
||||||
|
dmultu $8,$7
|
||||||
|
dsubu $3,$10,$3
|
||||||
|
sgtu $10,$3,$10
|
||||||
|
daddu $2,$2,$10
|
||||||
|
sd $3,0($4)
|
||||||
|
daddiu $4,$4,8
|
||||||
|
daddu $2,$9,$2 # add high product limb and carry from addition
|
||||||
|
|
||||||
|
# cool down phase 0
|
||||||
|
$LC0: ld $10,0($4)
|
||||||
|
mflo $3
|
||||||
|
mfhi $9
|
||||||
|
daddu $3,$3,$2
|
||||||
|
sltu $2,$3,$2
|
||||||
|
dsubu $3,$10,$3
|
||||||
|
sgtu $10,$3,$10
|
||||||
|
daddu $2,$2,$10
|
||||||
|
sd $3,0($4)
|
||||||
|
j $31
|
||||||
|
daddu $2,$9,$2 # add high product limb and carry from addition
|
||||||
|
|
||||||
|
.end __mpn_submul_1
|
84
sysdeps/mips/mul_1.s
Normal file
84
sysdeps/mips/mul_1.s
Normal file
@ -0,0 +1,84 @@
|
|||||||
|
# MIPS __mpn_mul_1 -- Multiply a limb vector with a single limb and
|
||||||
|
# store the product in a second limb vector.
|
||||||
|
|
||||||
|
# Copyright (C) 1992, 1994 Free Software Foundation, Inc.
|
||||||
|
|
||||||
|
# This file is part of the GNU MP Library.
|
||||||
|
|
||||||
|
# The GNU MP Library is free software; you can redistribute it and/or modify
|
||||||
|
# it under the terms of the GNU Library General Public License as published by
|
||||||
|
# the Free Software Foundation; either version 2 of the License, or (at your
|
||||||
|
# option) any later version.
|
||||||
|
|
||||||
|
# The GNU MP Library is distributed in the hope that it will be useful, but
|
||||||
|
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
||||||
|
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
|
||||||
|
# License for more details.
|
||||||
|
|
||||||
|
# You should have received a copy of the GNU Library General Public License
|
||||||
|
# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
|
||||||
|
# the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||||
|
|
||||||
|
|
||||||
|
# INPUT PARAMETERS
|
||||||
|
# res_ptr $4
|
||||||
|
# s1_ptr $5
|
||||||
|
# size $6
|
||||||
|
# s2_limb $7
|
||||||
|
|
||||||
|
.text
|
||||||
|
.align 4
|
||||||
|
.globl __mpn_mul_1
|
||||||
|
.ent __mpn_mul_1
|
||||||
|
__mpn_mul_1:
|
||||||
|
.set noreorder
|
||||||
|
.set nomacro
|
||||||
|
|
||||||
|
# warm up phase 0
|
||||||
|
lw $8,0($5)
|
||||||
|
|
||||||
|
# warm up phase 1
|
||||||
|
addiu $5,$5,4
|
||||||
|
multu $8,$7
|
||||||
|
|
||||||
|
addiu $6,$6,-1
|
||||||
|
beq $6,$0,$LC0
|
||||||
|
move $2,$0 # zero cy2
|
||||||
|
|
||||||
|
addiu $6,$6,-1
|
||||||
|
beq $6,$0,$LC1
|
||||||
|
lw $8,0($5) # load new s1 limb as early as possible
|
||||||
|
|
||||||
|
Loop: mflo $10
|
||||||
|
mfhi $9
|
||||||
|
addiu $5,$5,4
|
||||||
|
addu $10,$10,$2 # add old carry limb to low product limb
|
||||||
|
multu $8,$7
|
||||||
|
lw $8,0($5) # load new s1 limb as early as possible
|
||||||
|
addiu $6,$6,-1 # decrement loop counter
|
||||||
|
sltu $2,$10,$2 # carry from previous addition -> $2
|
||||||
|
sw $10,0($4)
|
||||||
|
addiu $4,$4,4
|
||||||
|
bne $6,$0,Loop # should be "bnel"
|
||||||
|
addu $2,$9,$2 # add high product limb and carry from addition
|
||||||
|
|
||||||
|
# cool down phase 1
|
||||||
|
$LC1: mflo $10
|
||||||
|
mfhi $9
|
||||||
|
addu $10,$10,$2
|
||||||
|
sltu $2,$10,$2
|
||||||
|
multu $8,$7
|
||||||
|
sw $10,0($4)
|
||||||
|
addiu $4,$4,4
|
||||||
|
addu $2,$9,$2 # add high product limb and carry from addition
|
||||||
|
|
||||||
|
# cool down phase 0
|
||||||
|
$LC0: mflo $10
|
||||||
|
mfhi $9
|
||||||
|
addu $10,$10,$2
|
||||||
|
sltu $2,$10,$2
|
||||||
|
sw $10,0($4)
|
||||||
|
j $31
|
||||||
|
addu $2,$9,$2 # add high product limb and carry from addition
|
||||||
|
|
||||||
|
.end __mpn_mul_1
|
91
sysdeps/mips/rshift.s
Normal file
91
sysdeps/mips/rshift.s
Normal file
@ -0,0 +1,91 @@
|
|||||||
|
# MIPS2 __mpn_rshift --
|
||||||
|
|
||||||
|
# Copyright (C) 1995 Free Software Foundation, Inc.
|
||||||
|
|
||||||
|
# This file is part of the GNU MP Library.
|
||||||
|
|
||||||
|
# The GNU MP Library is free software; you can redistribute it and/or modify
|
||||||
|
# it under the terms of the GNU Library General Public License as published by
|
||||||
|
# the Free Software Foundation; either version 2 of the License, or (at your
|
||||||
|
# option) any later version.
|
||||||
|
|
||||||
|
# The GNU MP Library is distributed in the hope that it will be useful, but
|
||||||
|
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
||||||
|
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
|
||||||
|
# License for more details.
|
||||||
|
|
||||||
|
# You should have received a copy of the GNU Library General Public License
|
||||||
|
# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
|
||||||
|
# the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||||
|
|
||||||
|
|
||||||
|
# INPUT PARAMETERS
|
||||||
|
# res_ptr $4
|
||||||
|
# src_ptr $5
|
||||||
|
# size $6
|
||||||
|
# cnt $7
|
||||||
|
|
||||||
|
.text
|
||||||
|
.align 2
|
||||||
|
.globl __mpn_rshift
|
||||||
|
.ent __mpn_rshift
|
||||||
|
__mpn_rshift:
|
||||||
|
.set noreorder
|
||||||
|
.set nomacro
|
||||||
|
|
||||||
|
lw $10,0($5) # load first limb
|
||||||
|
subu $13,$0,$7
|
||||||
|
addiu $6,$6,-1
|
||||||
|
and $9,$6,4-1 # number of limbs in first loop
|
||||||
|
beq $9,$0,.L0 # if multiple of 4 limbs, skip first loop
|
||||||
|
sll $2,$10,$13 # compute function result
|
||||||
|
|
||||||
|
subu $6,$6,$9
|
||||||
|
|
||||||
|
.Loop0: lw $3,4($5)
|
||||||
|
addiu $4,$4,4
|
||||||
|
addiu $5,$5,4
|
||||||
|
addiu $9,$9,-1
|
||||||
|
srl $11,$10,$7
|
||||||
|
sll $12,$3,$13
|
||||||
|
move $10,$3
|
||||||
|
or $8,$11,$12
|
||||||
|
bne $9,$0,.Loop0
|
||||||
|
sw $8,-4($4)
|
||||||
|
|
||||||
|
.L0: beq $6,$0,.Lend
|
||||||
|
nop
|
||||||
|
|
||||||
|
.Loop: lw $3,4($5)
|
||||||
|
addiu $4,$4,16
|
||||||
|
addiu $6,$6,-4
|
||||||
|
srl $11,$10,$7
|
||||||
|
sll $12,$3,$13
|
||||||
|
|
||||||
|
lw $10,8($5)
|
||||||
|
srl $14,$3,$7
|
||||||
|
or $8,$11,$12
|
||||||
|
sw $8,-16($4)
|
||||||
|
sll $9,$10,$13
|
||||||
|
|
||||||
|
lw $3,12($5)
|
||||||
|
srl $11,$10,$7
|
||||||
|
or $8,$14,$9
|
||||||
|
sw $8,-12($4)
|
||||||
|
sll $12,$3,$13
|
||||||
|
|
||||||
|
lw $10,16($5)
|
||||||
|
srl $14,$3,$7
|
||||||
|
or $8,$11,$12
|
||||||
|
sw $8,-8($4)
|
||||||
|
sll $9,$10,$13
|
||||||
|
|
||||||
|
addiu $5,$5,16
|
||||||
|
or $8,$14,$9
|
||||||
|
bgtz $6,.Loop
|
||||||
|
sw $8,-4($4)
|
||||||
|
|
||||||
|
.Lend: srl $8,$10,$7
|
||||||
|
j $31
|
||||||
|
sw $8,0($4)
|
||||||
|
.end __mpn_rshift
|
119
sysdeps/mips/sub_n.s
Normal file
119
sysdeps/mips/sub_n.s
Normal file
@ -0,0 +1,119 @@
|
|||||||
|
# MIPS2 __mpn_sub_n -- Subtract two limb vectors of the same length > 0 and
|
||||||
|
# store difference in a third limb vector.
|
||||||
|
|
||||||
|
# Copyright (C) 1995 Free Software Foundation, Inc.
|
||||||
|
|
||||||
|
# This file is part of the GNU MP Library.
|
||||||
|
|
||||||
|
# The GNU MP Library is free software; you can redistribute it and/or modify
|
||||||
|
# it under the terms of the GNU Library General Public License as published by
|
||||||
|
# the Free Software Foundation; either version 2 of the License, or (at your
|
||||||
|
# option) any later version.
|
||||||
|
|
||||||
|
# The GNU MP Library is distributed in the hope that it will be useful, but
|
||||||
|
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
||||||
|
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
|
||||||
|
# License for more details.
|
||||||
|
|
||||||
|
# You should have received a copy of the GNU Library General Public License
|
||||||
|
# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
|
||||||
|
# the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||||
|
|
||||||
|
|
||||||
|
# INPUT PARAMETERS
|
||||||
|
# res_ptr $4
|
||||||
|
# s1_ptr $5
|
||||||
|
# s2_ptr $6
|
||||||
|
# size $7
|
||||||
|
|
||||||
|
.text
|
||||||
|
.align 2
|
||||||
|
.globl __mpn_sub_n
|
||||||
|
.ent __mpn_sub_n
|
||||||
|
__mpn_sub_n:
|
||||||
|
.set noreorder
|
||||||
|
.set nomacro
|
||||||
|
|
||||||
|
lw $10,0($5)
|
||||||
|
lw $11,0($6)
|
||||||
|
|
||||||
|
addiu $7,$7,-1
|
||||||
|
and $9,$7,4-1 # number of limbs in first loop
|
||||||
|
beq $9,$0,.L0 # if multiple of 4 limbs, skip first loop
|
||||||
|
move $2,$0
|
||||||
|
|
||||||
|
subu $7,$7,$9
|
||||||
|
|
||||||
|
.Loop0: addiu $9,$9,-1
|
||||||
|
lw $12,4($5)
|
||||||
|
addu $11,$11,$2
|
||||||
|
lw $13,4($6)
|
||||||
|
sltu $8,$11,$2
|
||||||
|
subu $11,$10,$11
|
||||||
|
sltu $2,$10,$11
|
||||||
|
sw $11,0($4)
|
||||||
|
or $2,$2,$8
|
||||||
|
|
||||||
|
addiu $5,$5,4
|
||||||
|
addiu $6,$6,4
|
||||||
|
move $10,$12
|
||||||
|
move $11,$13
|
||||||
|
bne $9,$0,.Loop0
|
||||||
|
addiu $4,$4,4
|
||||||
|
|
||||||
|
.L0: beq $7,$0,.Lend
|
||||||
|
nop
|
||||||
|
|
||||||
|
.Loop: addiu $7,$7,-4
|
||||||
|
|
||||||
|
lw $12,4($5)
|
||||||
|
addu $11,$11,$2
|
||||||
|
lw $13,4($6)
|
||||||
|
sltu $8,$11,$2
|
||||||
|
subu $11,$10,$11
|
||||||
|
sltu $2,$10,$11
|
||||||
|
sw $11,0($4)
|
||||||
|
or $2,$2,$8
|
||||||
|
|
||||||
|
lw $10,8($5)
|
||||||
|
addu $13,$13,$2
|
||||||
|
lw $11,8($6)
|
||||||
|
sltu $8,$13,$2
|
||||||
|
subu $13,$12,$13
|
||||||
|
sltu $2,$12,$13
|
||||||
|
sw $13,4($4)
|
||||||
|
or $2,$2,$8
|
||||||
|
|
||||||
|
lw $12,12($5)
|
||||||
|
addu $11,$11,$2
|
||||||
|
lw $13,12($6)
|
||||||
|
sltu $8,$11,$2
|
||||||
|
subu $11,$10,$11
|
||||||
|
sltu $2,$10,$11
|
||||||
|
sw $11,8($4)
|
||||||
|
or $2,$2,$8
|
||||||
|
|
||||||
|
lw $10,16($5)
|
||||||
|
addu $13,$13,$2
|
||||||
|
lw $11,16($6)
|
||||||
|
sltu $8,$13,$2
|
||||||
|
subu $13,$12,$13
|
||||||
|
sltu $2,$12,$13
|
||||||
|
sw $13,12($4)
|
||||||
|
or $2,$2,$8
|
||||||
|
|
||||||
|
addiu $5,$5,16
|
||||||
|
addiu $6,$6,16
|
||||||
|
|
||||||
|
bne $7,$0,.Loop
|
||||||
|
addiu $4,$4,16
|
||||||
|
|
||||||
|
.Lend: addu $11,$11,$2
|
||||||
|
sltu $8,$11,$2
|
||||||
|
subu $11,$10,$11
|
||||||
|
sltu $2,$10,$11
|
||||||
|
sw $11,0($4)
|
||||||
|
j $31
|
||||||
|
or $2,$2,$8
|
||||||
|
|
||||||
|
.end __mpn_sub_n
|
96
sysdeps/mips/submul_1.s
Normal file
96
sysdeps/mips/submul_1.s
Normal file
@ -0,0 +1,96 @@
|
|||||||
|
# MIPS __mpn_submul_1 -- Multiply a limb vector with a single limb and
|
||||||
|
# subtract the product from a second limb vector.
|
||||||
|
|
||||||
|
# Copyright (C) 1992, 1994 Free Software Foundation, Inc.
|
||||||
|
|
||||||
|
# This file is part of the GNU MP Library.
|
||||||
|
|
||||||
|
# The GNU MP Library is free software; you can redistribute it and/or modify
|
||||||
|
# it under the terms of the GNU Library General Public License as published by
|
||||||
|
# the Free Software Foundation; either version 2 of the License, or (at your
|
||||||
|
# option) any later version.
|
||||||
|
|
||||||
|
# The GNU MP Library is distributed in the hope that it will be useful, but
|
||||||
|
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
||||||
|
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
|
||||||
|
# License for more details.
|
||||||
|
|
||||||
|
# You should have received a copy of the GNU Library General Public License
|
||||||
|
# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
|
||||||
|
# the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||||
|
|
||||||
|
|
||||||
|
# INPUT PARAMETERS
|
||||||
|
# res_ptr $4
|
||||||
|
# s1_ptr $5
|
||||||
|
# size $6
|
||||||
|
# s2_limb $7
|
||||||
|
|
||||||
|
.text
|
||||||
|
.align 4
|
||||||
|
.globl __mpn_submul_1
|
||||||
|
.ent __mpn_submul_1
|
||||||
|
__mpn_submul_1:
|
||||||
|
.set noreorder
|
||||||
|
.set nomacro
|
||||||
|
|
||||||
|
# warm up phase 0
|
||||||
|
lw $8,0($5)
|
||||||
|
|
||||||
|
# warm up phase 1
|
||||||
|
addiu $5,$5,4
|
||||||
|
multu $8,$7
|
||||||
|
|
||||||
|
addiu $6,$6,-1
|
||||||
|
beq $6,$0,$LC0
|
||||||
|
move $2,$0 # zero cy2
|
||||||
|
|
||||||
|
addiu $6,$6,-1
|
||||||
|
beq $6,$0,$LC1
|
||||||
|
lw $8,0($5) # load new s1 limb as early as possible
|
||||||
|
|
||||||
|
Loop: lw $10,0($4)
|
||||||
|
mflo $3
|
||||||
|
mfhi $9
|
||||||
|
addiu $5,$5,4
|
||||||
|
addu $3,$3,$2 # add old carry limb to low product limb
|
||||||
|
multu $8,$7
|
||||||
|
lw $8,0($5) # load new s1 limb as early as possible
|
||||||
|
addiu $6,$6,-1 # decrement loop counter
|
||||||
|
sltu $2,$3,$2 # carry from previous addition -> $2
|
||||||
|
subu $3,$10,$3
|
||||||
|
sgtu $10,$3,$10
|
||||||
|
addu $2,$2,$10
|
||||||
|
sw $3,0($4)
|
||||||
|
addiu $4,$4,4
|
||||||
|
bne $6,$0,Loop # should be "bnel"
|
||||||
|
addu $2,$9,$2 # add high product limb and carry from addition
|
||||||
|
|
||||||
|
# cool down phase 1
|
||||||
|
$LC1: lw $10,0($4)
|
||||||
|
mflo $3
|
||||||
|
mfhi $9
|
||||||
|
addu $3,$3,$2
|
||||||
|
sltu $2,$3,$2
|
||||||
|
multu $8,$7
|
||||||
|
subu $3,$10,$3
|
||||||
|
sgtu $10,$3,$10
|
||||||
|
addu $2,$2,$10
|
||||||
|
sw $3,0($4)
|
||||||
|
addiu $4,$4,4
|
||||||
|
addu $2,$9,$2 # add high product limb and carry from addition
|
||||||
|
|
||||||
|
# cool down phase 0
|
||||||
|
$LC0: lw $10,0($4)
|
||||||
|
mflo $3
|
||||||
|
mfhi $9
|
||||||
|
addu $3,$3,$2
|
||||||
|
sltu $2,$3,$2
|
||||||
|
subu $3,$10,$3
|
||||||
|
sgtu $10,$3,$10
|
||||||
|
addu $2,$2,$10
|
||||||
|
sw $3,0($4)
|
||||||
|
j $31
|
||||||
|
addu $2,$9,$2 # add high product limb and carry from addition
|
||||||
|
|
||||||
|
.end __mpn_submul_1
|
54
sysdeps/rs6000/add_n.s
Normal file
54
sysdeps/rs6000/add_n.s
Normal file
@ -0,0 +1,54 @@
|
|||||||
|
# IBM POWER __mpn_add_n -- Add two limb vectors of equal, non-zero length.
|
||||||
|
|
||||||
|
# Copyright (C) 1992, 1994 Free Software Foundation, Inc.
|
||||||
|
|
||||||
|
# This file is part of the GNU MP Library.
|
||||||
|
|
||||||
|
# The GNU MP Library is free software; you can redistribute it and/or modify
|
||||||
|
# it under the terms of the GNU Library General Public License as published by
|
||||||
|
# the Free Software Foundation; either version 2 of the License, or (at your
|
||||||
|
# option) any later version.
|
||||||
|
|
||||||
|
# The GNU MP Library is distributed in the hope that it will be useful, but
|
||||||
|
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
||||||
|
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
|
||||||
|
# License for more details.
|
||||||
|
|
||||||
|
# You should have received a copy of the GNU Library General Public License
|
||||||
|
# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
|
||||||
|
# the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||||
|
|
||||||
|
|
||||||
|
# INPUT PARAMETERS
|
||||||
|
# res_ptr r3
|
||||||
|
# s1_ptr r4
|
||||||
|
# s2_ptr r5
|
||||||
|
# size r6
|
||||||
|
|
||||||
|
.toc
|
||||||
|
.extern __mpn_add_n[DS]
|
||||||
|
.extern .__mpn_add_n
|
||||||
|
.csect [PR]
|
||||||
|
.align 2
|
||||||
|
.globl __mpn_add_n
|
||||||
|
.globl .__mpn_add_n
|
||||||
|
.csect __mpn_add_n[DS]
|
||||||
|
__mpn_add_n:
|
||||||
|
.long .__mpn_add_n, TOC[tc0], 0
|
||||||
|
.csect [PR]
|
||||||
|
.__mpn_add_n:
|
||||||
|
mtctr 6 # copy size into CTR
|
||||||
|
l 8,0(4) # load least significant s1 limb
|
||||||
|
l 0,0(5) # load least significant s2 limb
|
||||||
|
cal 3,-4(3) # offset res_ptr, it's updated before used
|
||||||
|
a 7,0,8 # add least significant limbs, set cy
|
||||||
|
bdz Lend # If done, skip loop
|
||||||
|
Loop: lu 8,4(4) # load s1 limb and update s1_ptr
|
||||||
|
lu 0,4(5) # load s2 limb and update s2_ptr
|
||||||
|
stu 7,4(3) # store previous limb in load latecny slot
|
||||||
|
ae 7,0,8 # add new limbs with cy, set cy
|
||||||
|
bdn Loop # decrement CTR and loop back
|
||||||
|
Lend: st 7,4(3) # store ultimate result limb
|
||||||
|
lil 3,0 # load cy into ...
|
||||||
|
aze 3,3 # ... return value register
|
||||||
|
br
|
122
sysdeps/rs6000/addmul_1.s
Normal file
122
sysdeps/rs6000/addmul_1.s
Normal file
@ -0,0 +1,122 @@
|
|||||||
|
# IBM POWER __mpn_addmul_1 -- Multiply a limb vector with a limb and add
|
||||||
|
# the result to a second limb vector.
|
||||||
|
|
||||||
|
# Copyright (C) 1992, 1994 Free Software Foundation, Inc.
|
||||||
|
|
||||||
|
# This file is part of the GNU MP Library.
|
||||||
|
|
||||||
|
# The GNU MP Library is free software; you can redistribute it and/or modify
|
||||||
|
# it under the terms of the GNU Library General Public License as published by
|
||||||
|
# the Free Software Foundation; either version 2 of the License, or (at your
|
||||||
|
# option) any later version.
|
||||||
|
|
||||||
|
# The GNU MP Library is distributed in the hope that it will be useful, but
|
||||||
|
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
||||||
|
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
|
||||||
|
# License for more details.
|
||||||
|
|
||||||
|
# You should have received a copy of the GNU Library General Public License
|
||||||
|
# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
|
||||||
|
# the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||||
|
|
||||||
|
|
||||||
|
# INPUT PARAMETERS
|
||||||
|
# res_ptr r3
|
||||||
|
# s1_ptr r4
|
||||||
|
# size r5
|
||||||
|
# s2_limb r6
|
||||||
|
|
||||||
|
# The RS/6000 has no unsigned 32x32->64 bit multiplication instruction. To
|
||||||
|
# obtain that operation, we have to use the 32x32->64 signed multiplication
|
||||||
|
# instruction, and add the appropriate compensation to the high limb of the
|
||||||
|
# result. We add the multiplicand if the multiplier has its most significant
|
||||||
|
# bit set, and we add the multiplier if the multiplicand has its most
|
||||||
|
# significant bit set. We need to preserve the carry flag between each
|
||||||
|
# iteration, so we have to compute the compensation carefully (the natural,
|
||||||
|
# srai+and doesn't work). Since the POWER architecture has a branch unit
|
||||||
|
# we can branch in zero cycles, so that's how we perform the additions.
|
||||||
|
|
||||||
|
.toc
|
||||||
|
.csect .__mpn_addmul_1[PR]
|
||||||
|
.align 2
|
||||||
|
.globl __mpn_addmul_1
|
||||||
|
.globl .__mpn_addmul_1
|
||||||
|
.csect __mpn_addmul_1[DS]
|
||||||
|
__mpn_addmul_1:
|
||||||
|
.long .__mpn_addmul_1[PR], TOC[tc0], 0
|
||||||
|
.csect .__mpn_addmul_1[PR]
|
||||||
|
.__mpn_addmul_1:
|
||||||
|
|
||||||
|
cal 3,-4(3)
|
||||||
|
l 0,0(4)
|
||||||
|
cmpi 0,6,0
|
||||||
|
mtctr 5
|
||||||
|
mul 9,0,6
|
||||||
|
srai 7,0,31
|
||||||
|
and 7,7,6
|
||||||
|
mfmq 8
|
||||||
|
cax 9,9,7
|
||||||
|
l 7,4(3)
|
||||||
|
a 8,8,7 # add res_limb
|
||||||
|
blt Lneg
|
||||||
|
Lpos: bdz Lend
|
||||||
|
|
||||||
|
Lploop: lu 0,4(4)
|
||||||
|
stu 8,4(3)
|
||||||
|
cmpi 0,0,0
|
||||||
|
mul 10,0,6
|
||||||
|
mfmq 0
|
||||||
|
ae 8,0,9 # low limb + old_cy_limb + old cy
|
||||||
|
l 7,4(3)
|
||||||
|
aze 10,10 # propagate cy to new cy_limb
|
||||||
|
a 8,8,7 # add res_limb
|
||||||
|
bge Lp0
|
||||||
|
cax 10,10,6 # adjust high limb for negative limb from s1
|
||||||
|
Lp0: bdz Lend0
|
||||||
|
lu 0,4(4)
|
||||||
|
stu 8,4(3)
|
||||||
|
cmpi 0,0,0
|
||||||
|
mul 9,0,6
|
||||||
|
mfmq 0
|
||||||
|
ae 8,0,10
|
||||||
|
l 7,4(3)
|
||||||
|
aze 9,9
|
||||||
|
a 8,8,7
|
||||||
|
bge Lp1
|
||||||
|
cax 9,9,6 # adjust high limb for negative limb from s1
|
||||||
|
Lp1: bdn Lploop
|
||||||
|
|
||||||
|
b Lend
|
||||||
|
|
||||||
|
Lneg: cax 9,9,0
|
||||||
|
bdz Lend
|
||||||
|
Lnloop: lu 0,4(4)
|
||||||
|
stu 8,4(3)
|
||||||
|
cmpi 0,0,0
|
||||||
|
mul 10,0,6
|
||||||
|
mfmq 7
|
||||||
|
ae 8,7,9
|
||||||
|
l 7,4(3)
|
||||||
|
ae 10,10,0 # propagate cy to new cy_limb
|
||||||
|
a 8,8,7 # add res_limb
|
||||||
|
bge Ln0
|
||||||
|
cax 10,10,6 # adjust high limb for negative limb from s1
|
||||||
|
Ln0: bdz Lend0
|
||||||
|
lu 0,4(4)
|
||||||
|
stu 8,4(3)
|
||||||
|
cmpi 0,0,0
|
||||||
|
mul 9,0,6
|
||||||
|
mfmq 7
|
||||||
|
ae 8,7,10
|
||||||
|
l 7,4(3)
|
||||||
|
ae 9,9,0 # propagate cy to new cy_limb
|
||||||
|
a 8,8,7 # add res_limb
|
||||||
|
bge Ln1
|
||||||
|
cax 9,9,6 # adjust high limb for negative limb from s1
|
||||||
|
Ln1: bdn Lnloop
|
||||||
|
b Lend
|
||||||
|
|
||||||
|
Lend0: cal 9,0(10)
|
||||||
|
Lend: st 8,4(3)
|
||||||
|
aze 3,9
|
||||||
|
br
|
58
sysdeps/rs6000/lshift.s
Normal file
58
sysdeps/rs6000/lshift.s
Normal file
@ -0,0 +1,58 @@
|
|||||||
|
# IBM POWER __mpn_lshift --
|
||||||
|
|
||||||
|
# Copyright (C) 1992, 1994 Free Software Foundation, Inc.
|
||||||
|
|
||||||
|
# This file is part of the GNU MP Library.
|
||||||
|
|
||||||
|
# The GNU MP Library is free software; you can redistribute it and/or modify
|
||||||
|
# it under the terms of the GNU Library General Public License as published by
|
||||||
|
# the Free Software Foundation; either version 2 of the License, or (at your
|
||||||
|
# option) any later version.
|
||||||
|
|
||||||
|
# The GNU MP Library is distributed in the hope that it will be useful, but
|
||||||
|
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
||||||
|
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
|
||||||
|
# License for more details.
|
||||||
|
|
||||||
|
# You should have received a copy of the GNU Library General Public License
|
||||||
|
# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
|
||||||
|
# the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||||
|
|
||||||
|
|
||||||
|
# INPUT PARAMETERS
|
||||||
|
# res_ptr r3
|
||||||
|
# s_ptr r4
|
||||||
|
# size r5
|
||||||
|
# cnt r6
|
||||||
|
|
||||||
|
.toc
|
||||||
|
.extern __mpn_lshift[DS]
|
||||||
|
.extern .__mpn_lshift
|
||||||
|
.csect [PR]
|
||||||
|
.align 2
|
||||||
|
.globl __mpn_lshift
|
||||||
|
.globl .__mpn_lshift
|
||||||
|
.csect __mpn_lshift[DS]
|
||||||
|
__mpn_lshift:
|
||||||
|
.long .__mpn_lshift, TOC[tc0], 0
|
||||||
|
.csect [PR]
|
||||||
|
.__mpn_lshift:
|
||||||
|
sli 0,5,2
|
||||||
|
cax 9,3,0
|
||||||
|
cax 4,4,0
|
||||||
|
sfi 8,6,32
|
||||||
|
mtctr 5 # put limb count in CTR loop register
|
||||||
|
lu 0,-4(4) # read most significant limb
|
||||||
|
sre 3,0,8 # compute carry out limb, and init MQ register
|
||||||
|
bdz Lend2 # if just one limb, skip loop
|
||||||
|
lu 0,-4(4) # read 2:nd most significant limb
|
||||||
|
sreq 7,0,8 # compute most significant limb of result
|
||||||
|
bdz Lend # if just two limb, skip loop
|
||||||
|
Loop: lu 0,-4(4) # load next lower limb
|
||||||
|
stu 7,-4(9) # store previous result during read latency
|
||||||
|
sreq 7,0,8 # compute result limb
|
||||||
|
bdn Loop # loop back until CTR is zero
|
||||||
|
Lend: stu 7,-4(9) # store 2:nd least significant limb
|
||||||
|
Lend2: sle 7,0,6 # compute least significant limb
|
||||||
|
st 7,-4(9) # store it" \
|
||||||
|
br
|
109
sysdeps/rs6000/mul_1.s
Normal file
109
sysdeps/rs6000/mul_1.s
Normal file
@ -0,0 +1,109 @@
|
|||||||
|
# IBM POWER __mpn_mul_1 -- Multiply a limb vector with a limb and store
|
||||||
|
# the result in a second limb vector.
|
||||||
|
|
||||||
|
# Copyright (C) 1992, 1994 Free Software Foundation, Inc.
|
||||||
|
|
||||||
|
# This file is part of the GNU MP Library.
|
||||||
|
|
||||||
|
# The GNU MP Library is free software; you can redistribute it and/or modify
|
||||||
|
# it under the terms of the GNU Library General Public License as published by
|
||||||
|
# the Free Software Foundation; either version 2 of the License, or (at your
|
||||||
|
# option) any later version.
|
||||||
|
|
||||||
|
# The GNU MP Library is distributed in the hope that it will be useful, but
|
||||||
|
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
||||||
|
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
|
||||||
|
# License for more details.
|
||||||
|
|
||||||
|
# You should have received a copy of the GNU Library General Public License
|
||||||
|
# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
|
||||||
|
# the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||||
|
|
||||||
|
|
||||||
|
# INPUT PARAMETERS
|
||||||
|
# res_ptr r3
|
||||||
|
# s1_ptr r4
|
||||||
|
# size r5
|
||||||
|
# s2_limb r6
|
||||||
|
|
||||||
|
# The RS/6000 has no unsigned 32x32->64 bit multiplication instruction. To
|
||||||
|
# obtain that operation, we have to use the 32x32->64 signed multiplication
|
||||||
|
# instruction, and add the appropriate compensation to the high limb of the
|
||||||
|
# result. We add the multiplicand if the multiplier has its most significant
|
||||||
|
# bit set, and we add the multiplier if the multiplicand has its most
|
||||||
|
# significant bit set. We need to preserve the carry flag between each
|
||||||
|
# iteration, so we have to compute the compensation carefully (the natural,
|
||||||
|
# srai+and doesn't work). Since the POWER architecture has a branch unit
|
||||||
|
# we can branch in zero cycles, so that's how we perform the additions.
|
||||||
|
|
||||||
|
.toc
|
||||||
|
.csect .__mpn_mul_1[PR]
|
||||||
|
.align 2
|
||||||
|
.globl __mpn_mul_1
|
||||||
|
.globl .__mpn_mul_1
|
||||||
|
.csect __mpn_mul_1[DS]
|
||||||
|
__mpn_mul_1:
|
||||||
|
.long .__mpn_mul_1[PR], TOC[tc0], 0
|
||||||
|
.csect .__mpn_mul_1[PR]
|
||||||
|
.__mpn_mul_1:
|
||||||
|
|
||||||
|
cal 3,-4(3)
|
||||||
|
l 0,0(4)
|
||||||
|
cmpi 0,6,0
|
||||||
|
mtctr 5
|
||||||
|
mul 9,0,6
|
||||||
|
srai 7,0,31
|
||||||
|
and 7,7,6
|
||||||
|
mfmq 8
|
||||||
|
ai 0,0,0 # reset carry
|
||||||
|
cax 9,9,7
|
||||||
|
blt Lneg
|
||||||
|
Lpos: bdz Lend
|
||||||
|
Lploop: lu 0,4(4)
|
||||||
|
stu 8,4(3)
|
||||||
|
cmpi 0,0,0
|
||||||
|
mul 10,0,6
|
||||||
|
mfmq 0
|
||||||
|
ae 8,0,9
|
||||||
|
bge Lp0
|
||||||
|
cax 10,10,6 # adjust high limb for negative limb from s1
|
||||||
|
Lp0: bdz Lend0
|
||||||
|
lu 0,4(4)
|
||||||
|
stu 8,4(3)
|
||||||
|
cmpi 0,0,0
|
||||||
|
mul 9,0,6
|
||||||
|
mfmq 0
|
||||||
|
ae 8,0,10
|
||||||
|
bge Lp1
|
||||||
|
cax 9,9,6 # adjust high limb for negative limb from s1
|
||||||
|
Lp1: bdn Lploop
|
||||||
|
b Lend
|
||||||
|
|
||||||
|
Lneg: cax 9,9,0
|
||||||
|
bdz Lend
|
||||||
|
Lnloop: lu 0,4(4)
|
||||||
|
stu 8,4(3)
|
||||||
|
cmpi 0,0,0
|
||||||
|
mul 10,0,6
|
||||||
|
cax 10,10,0 # adjust high limb for negative s2_limb
|
||||||
|
mfmq 0
|
||||||
|
ae 8,0,9
|
||||||
|
bge Ln0
|
||||||
|
cax 10,10,6 # adjust high limb for negative limb from s1
|
||||||
|
Ln0: bdz Lend0
|
||||||
|
lu 0,4(4)
|
||||||
|
stu 8,4(3)
|
||||||
|
cmpi 0,0,0
|
||||||
|
mul 9,0,6
|
||||||
|
cax 9,9,0 # adjust high limb for negative s2_limb
|
||||||
|
mfmq 0
|
||||||
|
ae 8,0,10
|
||||||
|
bge Ln1
|
||||||
|
cax 9,9,6 # adjust high limb for negative limb from s1
|
||||||
|
Ln1: bdn Lnloop
|
||||||
|
b Lend
|
||||||
|
|
||||||
|
Lend0: cal 9,0(10)
|
||||||
|
Lend: st 8,4(3)
|
||||||
|
aze 3,9
|
||||||
|
br
|
56
sysdeps/rs6000/rshift.s
Normal file
56
sysdeps/rs6000/rshift.s
Normal file
@ -0,0 +1,56 @@
|
|||||||
|
# IBM POWER __mpn_rshift --
|
||||||
|
|
||||||
|
# Copyright (C) 1992, 1994 Free Software Foundation, Inc.
|
||||||
|
|
||||||
|
# This file is part of the GNU MP Library.
|
||||||
|
|
||||||
|
# The GNU MP Library is free software; you can redistribute it and/or modify
|
||||||
|
# it under the terms of the GNU Library General Public License as published by
|
||||||
|
# the Free Software Foundation; either version 2 of the License, or (at your
|
||||||
|
# option) any later version.
|
||||||
|
|
||||||
|
# The GNU MP Library is distributed in the hope that it will be useful, but
|
||||||
|
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
||||||
|
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
|
||||||
|
# License for more details.
|
||||||
|
|
||||||
|
# You should have received a copy of the GNU Library General Public License
|
||||||
|
# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
|
||||||
|
# the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||||
|
|
||||||
|
|
||||||
|
# INPUT PARAMETERS
|
||||||
|
# res_ptr r3
|
||||||
|
# s_ptr r4
|
||||||
|
# size r5
|
||||||
|
# cnt r6
|
||||||
|
|
||||||
|
.toc
|
||||||
|
.extern __mpn_rshift[DS]
|
||||||
|
.extern .__mpn_rshift
|
||||||
|
.csect [PR]
|
||||||
|
.align 2
|
||||||
|
.globl __mpn_rshift
|
||||||
|
.globl .__mpn_rshift
|
||||||
|
.csect __mpn_rshift[DS]
|
||||||
|
__mpn_rshift:
|
||||||
|
.long .__mpn_rshift, TOC[tc0], 0
|
||||||
|
.csect [PR]
|
||||||
|
.__mpn_rshift:
|
||||||
|
sfi 8,6,32
|
||||||
|
mtctr 5 # put limb count in CTR loop register
|
||||||
|
l 0,0(4) # read least significant limb
|
||||||
|
ai 9,3,-4 # adjust res_ptr since it's offset in the stu:s
|
||||||
|
sle 3,0,8 # compute carry limb, and init MQ register
|
||||||
|
bdz Lend2 # if just one limb, skip loop
|
||||||
|
lu 0,4(4) # read 2:nd least significant limb
|
||||||
|
sleq 7,0,8 # compute least significant limb of result
|
||||||
|
bdz Lend # if just two limb, skip loop
|
||||||
|
Loop: lu 0,4(4) # load next higher limb
|
||||||
|
stu 7,4(9) # store previous result during read latency
|
||||||
|
sleq 7,0,8 # compute result limb
|
||||||
|
bdn Loop # loop back until CTR is zero
|
||||||
|
Lend: stu 7,4(9) # store 2:nd most significant limb
|
||||||
|
Lend2: sre 7,0,6 # compute most significant limb
|
||||||
|
st 7,4(9) # store it" \
|
||||||
|
br
|
55
sysdeps/rs6000/sub_n.s
Normal file
55
sysdeps/rs6000/sub_n.s
Normal file
@ -0,0 +1,55 @@
|
|||||||
|
# IBM POWER __mpn_sub_n -- Subtract two limb vectors of the same length > 0 and
|
||||||
|
# store difference in a third limb vector.
|
||||||
|
|
||||||
|
# Copyright (C) 1992, 1994 Free Software Foundation, Inc.
|
||||||
|
|
||||||
|
# This file is part of the GNU MP Library.
|
||||||
|
|
||||||
|
# The GNU MP Library is free software; you can redistribute it and/or modify
|
||||||
|
# it under the terms of the GNU Library General Public License as published by
|
||||||
|
# the Free Software Foundation; either version 2 of the License, or (at your
|
||||||
|
# option) any later version.
|
||||||
|
|
||||||
|
# The GNU MP Library is distributed in the hope that it will be useful, but
|
||||||
|
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
||||||
|
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
|
||||||
|
# License for more details.
|
||||||
|
|
||||||
|
# You should have received a copy of the GNU Library General Public License
|
||||||
|
# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
|
||||||
|
# the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||||
|
|
||||||
|
|
||||||
|
# INPUT PARAMETERS
|
||||||
|
# res_ptr r3
|
||||||
|
# s1_ptr r4
|
||||||
|
# s2_ptr r5
|
||||||
|
# size r6
|
||||||
|
|
||||||
|
.toc
|
||||||
|
.extern __mpn_sub_n[DS]
|
||||||
|
.extern .__mpn_sub_n
|
||||||
|
.csect [PR]
|
||||||
|
.align 2
|
||||||
|
.globl __mpn_sub_n
|
||||||
|
.globl .__mpn_sub_n
|
||||||
|
.csect __mpn_sub_n[DS]
|
||||||
|
__mpn_sub_n:
|
||||||
|
.long .__mpn_sub_n, TOC[tc0], 0
|
||||||
|
.csect [PR]
|
||||||
|
.__mpn_sub_n:
|
||||||
|
mtctr 6 # copy size into CTR
|
||||||
|
l 8,0(4) # load least significant s1 limb
|
||||||
|
l 0,0(5) # load least significant s2 limb
|
||||||
|
cal 3,-4(3) # offset res_ptr, it's updated before used
|
||||||
|
sf 7,0,8 # add least significant limbs, set cy
|
||||||
|
bdz Lend # If done, skip loop
|
||||||
|
Loop: lu 8,4(4) # load s1 limb and update s1_ptr
|
||||||
|
lu 0,4(5) # load s2 limb and update s2_ptr
|
||||||
|
stu 7,4(3) # store previous limb in load latecny slot
|
||||||
|
sfe 7,0,8 # add new limbs with cy, set cy
|
||||||
|
bdn Loop # decrement CTR and loop back
|
||||||
|
Lend: st 7,4(3) # store ultimate result limb
|
||||||
|
sfe 3,0,0 # load !cy into ...
|
||||||
|
sfi 3,3,0 # ... return value register
|
||||||
|
br
|
127
sysdeps/rs6000/submul_1.s
Normal file
127
sysdeps/rs6000/submul_1.s
Normal file
@ -0,0 +1,127 @@
|
|||||||
|
# IBM POWER __mpn_submul_1 -- Multiply a limb vector with a limb and subtract
|
||||||
|
# the result from a second limb vector.
|
||||||
|
|
||||||
|
# Copyright (C) 1992, 1994 Free Software Foundation, Inc.
|
||||||
|
|
||||||
|
# This file is part of the GNU MP Library.
|
||||||
|
|
||||||
|
# The GNU MP Library is free software; you can redistribute it and/or modify
|
||||||
|
# it under the terms of the GNU Library General Public License as published by
|
||||||
|
# the Free Software Foundation; either version 2 of the License, or (at your
|
||||||
|
# option) any later version.
|
||||||
|
|
||||||
|
# The GNU MP Library is distributed in the hope that it will be useful, but
|
||||||
|
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
||||||
|
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
|
||||||
|
# License for more details.
|
||||||
|
|
||||||
|
# You should have received a copy of the GNU Library General Public License
|
||||||
|
# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
|
||||||
|
# the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||||
|
|
||||||
|
|
||||||
|
# INPUT PARAMETERS
|
||||||
|
# res_ptr r3
|
||||||
|
# s1_ptr r4
|
||||||
|
# size r5
|
||||||
|
# s2_limb r6
|
||||||
|
|
||||||
|
# The RS/6000 has no unsigned 32x32->64 bit multiplication instruction. To
|
||||||
|
# obtain that operation, we have to use the 32x32->64 signed multiplication
|
||||||
|
# instruction, and add the appropriate compensation to the high limb of the
|
||||||
|
# result. We add the multiplicand if the multiplier has its most significant
|
||||||
|
# bit set, and we add the multiplier if the multiplicand has its most
|
||||||
|
# significant bit set. We need to preserve the carry flag between each
|
||||||
|
# iteration, so we have to compute the compensation carefully (the natural,
|
||||||
|
# srai+and doesn't work). Since the POWER architecture has a branch unit
|
||||||
|
# we can branch in zero cycles, so that's how we perform the additions.
|
||||||
|
|
||||||
|
.toc
|
||||||
|
.csect .__mpn_submul_1[PR]
|
||||||
|
.align 2
|
||||||
|
.globl __mpn_submul_1
|
||||||
|
.globl .__mpn_submul_1
|
||||||
|
.csect __mpn_submul_1[DS]
|
||||||
|
__mpn_submul_1:
|
||||||
|
.long .__mpn_submul_1[PR], TOC[tc0], 0
|
||||||
|
.csect .__mpn_submul_1[PR]
|
||||||
|
.__mpn_submul_1:
|
||||||
|
|
||||||
|
cal 3,-4(3)
|
||||||
|
l 0,0(4)
|
||||||
|
cmpi 0,6,0
|
||||||
|
mtctr 5
|
||||||
|
mul 9,0,6
|
||||||
|
srai 7,0,31
|
||||||
|
and 7,7,6
|
||||||
|
mfmq 11
|
||||||
|
cax 9,9,7
|
||||||
|
l 7,4(3)
|
||||||
|
sf 8,11,7 # add res_limb
|
||||||
|
a 11,8,11 # invert cy (r11 is junk)
|
||||||
|
blt Lneg
|
||||||
|
Lpos: bdz Lend
|
||||||
|
|
||||||
|
Lploop: lu 0,4(4)
|
||||||
|
stu 8,4(3)
|
||||||
|
cmpi 0,0,0
|
||||||
|
mul 10,0,6
|
||||||
|
mfmq 0
|
||||||
|
ae 11,0,9 # low limb + old_cy_limb + old cy
|
||||||
|
l 7,4(3)
|
||||||
|
aze 10,10 # propagate cy to new cy_limb
|
||||||
|
sf 8,11,7 # add res_limb
|
||||||
|
a 11,8,11 # invert cy (r11 is junk)
|
||||||
|
bge Lp0
|
||||||
|
cax 10,10,6 # adjust high limb for negative limb from s1
|
||||||
|
Lp0: bdz Lend0
|
||||||
|
lu 0,4(4)
|
||||||
|
stu 8,4(3)
|
||||||
|
cmpi 0,0,0
|
||||||
|
mul 9,0,6
|
||||||
|
mfmq 0
|
||||||
|
ae 11,0,10
|
||||||
|
l 7,4(3)
|
||||||
|
aze 9,9
|
||||||
|
sf 8,11,7
|
||||||
|
a 11,8,11 # invert cy (r11 is junk)
|
||||||
|
bge Lp1
|
||||||
|
cax 9,9,6 # adjust high limb for negative limb from s1
|
||||||
|
Lp1: bdn Lploop
|
||||||
|
|
||||||
|
b Lend
|
||||||
|
|
||||||
|
Lneg: cax 9,9,0
|
||||||
|
bdz Lend
|
||||||
|
Lnloop: lu 0,4(4)
|
||||||
|
stu 8,4(3)
|
||||||
|
cmpi 0,0,0
|
||||||
|
mul 10,0,6
|
||||||
|
mfmq 7
|
||||||
|
ae 11,7,9
|
||||||
|
l 7,4(3)
|
||||||
|
ae 10,10,0 # propagate cy to new cy_limb
|
||||||
|
sf 8,11,7 # add res_limb
|
||||||
|
a 11,8,11 # invert cy (r11 is junk)
|
||||||
|
bge Ln0
|
||||||
|
cax 10,10,6 # adjust high limb for negative limb from s1
|
||||||
|
Ln0: bdz Lend0
|
||||||
|
lu 0,4(4)
|
||||||
|
stu 8,4(3)
|
||||||
|
cmpi 0,0,0
|
||||||
|
mul 9,0,6
|
||||||
|
mfmq 7
|
||||||
|
ae 11,7,10
|
||||||
|
l 7,4(3)
|
||||||
|
ae 9,9,0 # propagate cy to new cy_limb
|
||||||
|
sf 8,11,7 # add res_limb
|
||||||
|
a 11,8,11 # invert cy (r11 is junk)
|
||||||
|
bge Ln1
|
||||||
|
cax 9,9,6 # adjust high limb for negative limb from s1
|
||||||
|
Ln1: bdn Lnloop
|
||||||
|
b Lend
|
||||||
|
|
||||||
|
Lend0: cal 9,0(10)
|
||||||
|
Lend: st 8,4(3)
|
||||||
|
aze 3,9
|
||||||
|
br
|
47
sysdeps/vax/add_n.s
Normal file
47
sysdeps/vax/add_n.s
Normal file
@ -0,0 +1,47 @@
|
|||||||
|
# VAX __mpn_add_n -- Add two limb vectors of the same length > 0 and store
|
||||||
|
# sum in a third limb vector.
|
||||||
|
|
||||||
|
# Copyright (C) 1992, 1994 Free Software Foundation, Inc.
|
||||||
|
|
||||||
|
# This file is part of the GNU MP Library.
|
||||||
|
|
||||||
|
# The GNU MP Library is free software; you can redistribute it and/or modify
|
||||||
|
# it under the terms of the GNU Library General Public License as published by
|
||||||
|
# the Free Software Foundation; either version 2 of the License, or (at your
|
||||||
|
# option) any later version.
|
||||||
|
|
||||||
|
# The GNU MP Library is distributed in the hope that it will be useful, but
|
||||||
|
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
||||||
|
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
|
||||||
|
# License for more details.
|
||||||
|
|
||||||
|
# You should have received a copy of the GNU Library General Public License
|
||||||
|
# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
|
||||||
|
# the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||||
|
|
||||||
|
|
||||||
|
# INPUT PARAMETERS
|
||||||
|
# res_ptr (sp + 4)
|
||||||
|
# s1_ptr (sp + 8)
|
||||||
|
# s2_ptr (sp + 12)
|
||||||
|
# size (sp + 16)
|
||||||
|
|
||||||
|
.text
|
||||||
|
.align 1
|
||||||
|
.globl ___mpn_add_n
|
||||||
|
___mpn_add_n:
|
||||||
|
.word 0x0
|
||||||
|
movl 16(ap),r0
|
||||||
|
movl 12(ap),r1
|
||||||
|
movl 8(ap),r2
|
||||||
|
movl 4(ap),r3
|
||||||
|
subl2 r4,r4
|
||||||
|
|
||||||
|
Loop:
|
||||||
|
movl (r2)+,r4
|
||||||
|
adwc (r1)+,r4
|
||||||
|
movl r4,(r3)+
|
||||||
|
jsobgtr r0,Loop
|
||||||
|
|
||||||
|
adwc r0,r0
|
||||||
|
ret
|
125
sysdeps/vax/addmul_1.s
Normal file
125
sysdeps/vax/addmul_1.s
Normal file
@ -0,0 +1,125 @@
|
|||||||
|
# VAX __mpn_addmul_1 -- Multiply a limb vector with a limb and add
|
||||||
|
# the result to a second limb vector.
|
||||||
|
|
||||||
|
# Copyright (C) 1992, 1994 Free Software Foundation, Inc.
|
||||||
|
|
||||||
|
# This file is part of the GNU MP Library.
|
||||||
|
|
||||||
|
# The GNU MP Library is free software; you can redistribute it and/or modify
|
||||||
|
# it under the terms of the GNU Library General Public License as published by
|
||||||
|
# the Free Software Foundation; either version 2 of the License, or (at your
|
||||||
|
# option) any later version.
|
||||||
|
|
||||||
|
# The GNU MP Library is distributed in the hope that it will be useful, but
|
||||||
|
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
||||||
|
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
|
||||||
|
# License for more details.
|
||||||
|
|
||||||
|
# You should have received a copy of the GNU Library General Public License
|
||||||
|
# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
|
||||||
|
# the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||||
|
|
||||||
|
|
||||||
|
# INPUT PARAMETERS
|
||||||
|
# res_ptr (sp + 4)
|
||||||
|
# s1_ptr (sp + 8)
|
||||||
|
# size (sp + 12)
|
||||||
|
# s2_limb (sp + 16)
|
||||||
|
|
||||||
|
.text
|
||||||
|
.align 1
|
||||||
|
.globl ___mpn_addmul_1
|
||||||
|
___mpn_addmul_1:
|
||||||
|
.word 0xfc0
|
||||||
|
movl 12(ap),r4
|
||||||
|
movl 8(ap),r8
|
||||||
|
movl 4(ap),r9
|
||||||
|
movl 16(ap),r6
|
||||||
|
jlss s2_big
|
||||||
|
|
||||||
|
clrl r3
|
||||||
|
incl r4
|
||||||
|
ashl $-1,r4,r7
|
||||||
|
jlbc r4,L1
|
||||||
|
clrl r11
|
||||||
|
|
||||||
|
# Loop for S2_LIMB < 0x80000000
|
||||||
|
Loop1: movl (r8)+,r1
|
||||||
|
jlss L1n0
|
||||||
|
emul r1,r6,$0,r2
|
||||||
|
addl2 r11,r2
|
||||||
|
adwc $0,r3
|
||||||
|
addl2 r2,(r9)+
|
||||||
|
adwc $0,r3
|
||||||
|
L1: movl (r8)+,r1
|
||||||
|
jlss L1n1
|
||||||
|
L1p1: emul r1,r6,$0,r10
|
||||||
|
addl2 r3,r10
|
||||||
|
adwc $0,r11
|
||||||
|
addl2 r10,(r9)+
|
||||||
|
adwc $0,r11
|
||||||
|
|
||||||
|
jsobgtr r7,Loop1
|
||||||
|
movl r11,r0
|
||||||
|
ret
|
||||||
|
|
||||||
|
L1n0: emul r1,r6,$0,r2
|
||||||
|
addl2 r11,r2
|
||||||
|
adwc r6,r3
|
||||||
|
addl2 r2,(r9)+
|
||||||
|
adwc $0,r3
|
||||||
|
movl (r8)+,r1
|
||||||
|
jgeq L1p1
|
||||||
|
L1n1: emul r1,r6,$0,r10
|
||||||
|
addl2 r3,r10
|
||||||
|
adwc r6,r11
|
||||||
|
addl2 r10,(r9)+
|
||||||
|
adwc $0,r11
|
||||||
|
|
||||||
|
jsobgtr r7,Loop1
|
||||||
|
movl r11,r0
|
||||||
|
ret
|
||||||
|
|
||||||
|
|
||||||
|
s2_big: clrl r3
|
||||||
|
incl r4
|
||||||
|
ashl $-1,r4,r7
|
||||||
|
jlbc r4,L2
|
||||||
|
clrl r11
|
||||||
|
|
||||||
|
# Loop for S2_LIMB >= 0x80000000
|
||||||
|
Loop2: movl (r8)+,r1
|
||||||
|
jlss L2n0
|
||||||
|
emul r1,r6,$0,r2
|
||||||
|
addl2 r11,r2
|
||||||
|
adwc r1,r3
|
||||||
|
addl2 r2,(r9)+
|
||||||
|
adwc $0,r3
|
||||||
|
L2: movl (r8)+,r1
|
||||||
|
jlss L2n1
|
||||||
|
L2p1: emul r1,r6,$0,r10
|
||||||
|
addl2 r3,r10
|
||||||
|
adwc r1,r11
|
||||||
|
addl2 r10,(r9)+
|
||||||
|
adwc $0,r11
|
||||||
|
|
||||||
|
jsobgtr r7,Loop2
|
||||||
|
movl r11,r0
|
||||||
|
ret
|
||||||
|
|
||||||
|
L2n0: emul r1,r6,$0,r2
|
||||||
|
addl2 r11,r2
|
||||||
|
adwc r6,r3
|
||||||
|
addl2 r2,(r9)+
|
||||||
|
adwc r1,r3
|
||||||
|
movl (r8)+,r1
|
||||||
|
jgeq L2p1
|
||||||
|
L2n1: emul r1,r6,$0,r10
|
||||||
|
addl2 r3,r10
|
||||||
|
adwc r6,r11
|
||||||
|
addl2 r10,(r9)+
|
||||||
|
adwc r1,r11
|
||||||
|
|
||||||
|
jsobgtr r7,Loop2
|
||||||
|
movl r11,r0
|
||||||
|
ret
|
122
sysdeps/vax/mul_1.s
Normal file
122
sysdeps/vax/mul_1.s
Normal file
@ -0,0 +1,122 @@
|
|||||||
|
# VAX __mpn_mul_1 -- Multiply a limb vector with a limb and store
|
||||||
|
# the result in a second limb vector.
|
||||||
|
|
||||||
|
# Copyright (C) 1992, 1994 Free Software Foundation, Inc.
|
||||||
|
|
||||||
|
# This file is part of the GNU MP Library.
|
||||||
|
|
||||||
|
# The GNU MP Library is free software; you can redistribute it and/or modify
|
||||||
|
# it under the terms of the GNU Library General Public License as published by
|
||||||
|
# the Free Software Foundation; either version 2 of the License, or (at your
|
||||||
|
# option) any later version.
|
||||||
|
|
||||||
|
# The GNU MP Library is distributed in the hope that it will be useful, but
|
||||||
|
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
||||||
|
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
|
||||||
|
# License for more details.
|
||||||
|
|
||||||
|
# You should have received a copy of the GNU Library General Public License
|
||||||
|
# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
|
||||||
|
# the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||||
|
|
||||||
|
|
||||||
|
# INPUT PARAMETERS
|
||||||
|
# res_ptr (sp + 4)
|
||||||
|
# s1_ptr (sp + 8)
|
||||||
|
# size (sp + 12)
|
||||||
|
# s2_limb (sp + 16)
|
||||||
|
|
||||||
|
.text
|
||||||
|
.align 1
|
||||||
|
.globl ___mpn_mul_1
|
||||||
|
___mpn_mul_1:
|
||||||
|
.word 0xfc0
|
||||||
|
movl 12(ap),r4
|
||||||
|
movl 8(ap),r8
|
||||||
|
movl 4(ap),r9
|
||||||
|
movl 16(ap),r6
|
||||||
|
jlss s2_big
|
||||||
|
|
||||||
|
# One might want to combine the addl2 and the store below, but that
|
||||||
|
# is actually just slower according to my timing tests. (VAX 3600)
|
||||||
|
|
||||||
|
clrl r3
|
||||||
|
incl r4
|
||||||
|
ashl $-1,r4,r7
|
||||||
|
jlbc r4,L1
|
||||||
|
clrl r11
|
||||||
|
|
||||||
|
# Loop for S2_LIMB < 0x80000000
|
||||||
|
Loop1: movl (r8)+,r1
|
||||||
|
jlss L1n0
|
||||||
|
emul r1,r6,$0,r2
|
||||||
|
addl2 r11,r2
|
||||||
|
adwc $0,r3
|
||||||
|
movl r2,(r9)+
|
||||||
|
L1: movl (r8)+,r1
|
||||||
|
jlss L1n1
|
||||||
|
L1p1: emul r1,r6,$0,r10
|
||||||
|
addl2 r3,r10
|
||||||
|
adwc $0,r11
|
||||||
|
movl r10,(r9)+
|
||||||
|
|
||||||
|
jsobgtr r7,Loop1
|
||||||
|
movl r11,r0
|
||||||
|
ret
|
||||||
|
|
||||||
|
L1n0: emul r1,r6,$0,r2
|
||||||
|
addl2 r11,r2
|
||||||
|
adwc r6,r3
|
||||||
|
movl r2,(r9)+
|
||||||
|
movl (r8)+,r1
|
||||||
|
jgeq L1p1
|
||||||
|
L1n1: emul r1,r6,$0,r10
|
||||||
|
addl2 r3,r10
|
||||||
|
adwc r6,r11
|
||||||
|
movl r10,(r9)+
|
||||||
|
|
||||||
|
jsobgtr r7,Loop1
|
||||||
|
movl r11,r0
|
||||||
|
ret
|
||||||
|
|
||||||
|
|
||||||
|
s2_big: clrl r3
|
||||||
|
incl r4
|
||||||
|
ashl $-1,r4,r7
|
||||||
|
jlbc r4,L2
|
||||||
|
clrl r11
|
||||||
|
|
||||||
|
# Loop for S2_LIMB >= 0x80000000
|
||||||
|
Loop2: movl (r8)+,r1
|
||||||
|
jlss L2n0
|
||||||
|
emul r1,r6,$0,r2
|
||||||
|
addl2 r11,r2
|
||||||
|
adwc r1,r3
|
||||||
|
movl r2,(r9)+
|
||||||
|
L2: movl (r8)+,r1
|
||||||
|
jlss L2n1
|
||||||
|
L2p1: emul r1,r6,$0,r10
|
||||||
|
addl2 r3,r10
|
||||||
|
adwc r1,r11
|
||||||
|
movl r10,(r9)+
|
||||||
|
|
||||||
|
jsobgtr r7,Loop2
|
||||||
|
movl r11,r0
|
||||||
|
ret
|
||||||
|
|
||||||
|
L2n0: emul r1,r6,$0,r2
|
||||||
|
addl2 r1,r3
|
||||||
|
addl2 r11,r2
|
||||||
|
adwc r6,r3
|
||||||
|
movl r2,(r9)+
|
||||||
|
movl (r8)+,r1
|
||||||
|
jgeq L2p1
|
||||||
|
L2n1: emul r1,r6,$0,r10
|
||||||
|
addl2 r1,r11
|
||||||
|
addl2 r3,r10
|
||||||
|
adwc r6,r11
|
||||||
|
movl r10,(r9)+
|
||||||
|
|
||||||
|
jsobgtr r7,Loop2
|
||||||
|
movl r11,r0
|
||||||
|
ret
|
47
sysdeps/vax/sub_n.s
Normal file
47
sysdeps/vax/sub_n.s
Normal file
@ -0,0 +1,47 @@
|
|||||||
|
# VAX __mpn_sub_n -- Subtract two limb vectors of the same length > 0 and store
|
||||||
|
# difference in a third limb vector.
|
||||||
|
|
||||||
|
# Copyright (C) 1992, 1994 Free Software Foundation, Inc.
|
||||||
|
|
||||||
|
# This file is part of the GNU MP Library.
|
||||||
|
|
||||||
|
# The GNU MP Library is free software; you can redistribute it and/or modify
|
||||||
|
# it under the terms of the GNU Library General Public License as published by
|
||||||
|
# the Free Software Foundation; either version 2 of the License, or (at your
|
||||||
|
# option) any later version.
|
||||||
|
|
||||||
|
# The GNU MP Library is distributed in the hope that it will be useful, but
|
||||||
|
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
||||||
|
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
|
||||||
|
# License for more details.
|
||||||
|
|
||||||
|
# You should have received a copy of the GNU Library General Public License
|
||||||
|
# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
|
||||||
|
# the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||||
|
|
||||||
|
|
||||||
|
# INPUT PARAMETERS
|
||||||
|
# res_ptr (sp + 4)
|
||||||
|
# s1_ptr (sp + 8)
|
||||||
|
# s2_ptr (sp + 12)
|
||||||
|
# size (sp + 16)
|
||||||
|
|
||||||
|
.text
|
||||||
|
.align 1
|
||||||
|
.globl ___mpn_sub_n
|
||||||
|
___mpn_sub_n:
|
||||||
|
.word 0x0
|
||||||
|
movl 16(ap),r0
|
||||||
|
movl 12(ap),r1
|
||||||
|
movl 8(ap),r2
|
||||||
|
movl 4(ap),r3
|
||||||
|
subl2 r4,r4
|
||||||
|
|
||||||
|
Loop:
|
||||||
|
movl (r2)+,r4
|
||||||
|
sbwc (r1)+,r4
|
||||||
|
movl r4,(r3)+
|
||||||
|
jsobgtr r0,Loop
|
||||||
|
|
||||||
|
adwc r0,r0
|
||||||
|
ret
|
125
sysdeps/vax/submul_1.s
Normal file
125
sysdeps/vax/submul_1.s
Normal file
@ -0,0 +1,125 @@
|
|||||||
|
# VAX __mpn_submul_1 -- Multiply a limb vector with a limb and subtract
|
||||||
|
# the result from a second limb vector.
|
||||||
|
|
||||||
|
# Copyright (C) 1992, 1994 Free Software Foundation, Inc.
|
||||||
|
|
||||||
|
# This file is part of the GNU MP Library.
|
||||||
|
|
||||||
|
# The GNU MP Library is free software; you can redistribute it and/or modify
|
||||||
|
# it under the terms of the GNU Library General Public License as published by
|
||||||
|
# the Free Software Foundation; either version 2 of the License, or (at your
|
||||||
|
# option) any later version.
|
||||||
|
|
||||||
|
# The GNU MP Library is distributed in the hope that it will be useful, but
|
||||||
|
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
||||||
|
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
|
||||||
|
# License for more details.
|
||||||
|
|
||||||
|
# You should have received a copy of the GNU Library General Public License
|
||||||
|
# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
|
||||||
|
# the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||||
|
|
||||||
|
|
||||||
|
# INPUT PARAMETERS
|
||||||
|
# res_ptr (sp + 4)
|
||||||
|
# s1_ptr (sp + 8)
|
||||||
|
# size (sp + 12)
|
||||||
|
# s2_limb (sp + 16)
|
||||||
|
|
||||||
|
.text
|
||||||
|
.align 1
|
||||||
|
.globl ___mpn_submul_1
|
||||||
|
___mpn_submul_1:
|
||||||
|
.word 0xfc0
|
||||||
|
movl 12(ap),r4
|
||||||
|
movl 8(ap),r8
|
||||||
|
movl 4(ap),r9
|
||||||
|
movl 16(ap),r6
|
||||||
|
jlss s2_big
|
||||||
|
|
||||||
|
clrl r3
|
||||||
|
incl r4
|
||||||
|
ashl $-1,r4,r7
|
||||||
|
jlbc r4,L1
|
||||||
|
clrl r11
|
||||||
|
|
||||||
|
# Loop for S2_LIMB < 0x80000000
|
||||||
|
Loop1: movl (r8)+,r1
|
||||||
|
jlss L1n0
|
||||||
|
emul r1,r6,$0,r2
|
||||||
|
addl2 r11,r2
|
||||||
|
adwc $0,r3
|
||||||
|
subl2 r2,(r9)+
|
||||||
|
adwc $0,r3
|
||||||
|
L1: movl (r8)+,r1
|
||||||
|
jlss L1n1
|
||||||
|
L1p1: emul r1,r6,$0,r10
|
||||||
|
addl2 r3,r10
|
||||||
|
adwc $0,r11
|
||||||
|
subl2 r10,(r9)+
|
||||||
|
adwc $0,r11
|
||||||
|
|
||||||
|
jsobgtr r7,Loop1
|
||||||
|
movl r11,r0
|
||||||
|
ret
|
||||||
|
|
||||||
|
L1n0: emul r1,r6,$0,r2
|
||||||
|
addl2 r11,r2
|
||||||
|
adwc r6,r3
|
||||||
|
subl2 r2,(r9)+
|
||||||
|
adwc $0,r3
|
||||||
|
movl (r8)+,r1
|
||||||
|
jgeq L1p1
|
||||||
|
L1n1: emul r1,r6,$0,r10
|
||||||
|
addl2 r3,r10
|
||||||
|
adwc r6,r11
|
||||||
|
subl2 r10,(r9)+
|
||||||
|
adwc $0,r11
|
||||||
|
|
||||||
|
jsobgtr r7,Loop1
|
||||||
|
movl r11,r0
|
||||||
|
ret
|
||||||
|
|
||||||
|
|
||||||
|
s2_big: clrl r3
|
||||||
|
incl r4
|
||||||
|
ashl $-1,r4,r7
|
||||||
|
jlbc r4,L2
|
||||||
|
clrl r11
|
||||||
|
|
||||||
|
# Loop for S2_LIMB >= 0x80000000
|
||||||
|
Loop2: movl (r8)+,r1
|
||||||
|
jlss L2n0
|
||||||
|
emul r1,r6,$0,r2
|
||||||
|
addl2 r11,r2
|
||||||
|
adwc r1,r3
|
||||||
|
subl2 r2,(r9)+
|
||||||
|
adwc $0,r3
|
||||||
|
L2: movl (r8)+,r1
|
||||||
|
jlss L2n1
|
||||||
|
L2p1: emul r1,r6,$0,r10
|
||||||
|
addl2 r3,r10
|
||||||
|
adwc r1,r11
|
||||||
|
subl2 r10,(r9)+
|
||||||
|
adwc $0,r11
|
||||||
|
|
||||||
|
jsobgtr r7,Loop2
|
||||||
|
movl r11,r0
|
||||||
|
ret
|
||||||
|
|
||||||
|
L2n0: emul r1,r6,$0,r2
|
||||||
|
addl2 r11,r2
|
||||||
|
adwc r6,r3
|
||||||
|
subl2 r2,(r9)+
|
||||||
|
adwc r1,r3
|
||||||
|
movl (r8)+,r1
|
||||||
|
jgeq L2p1
|
||||||
|
L2n1: emul r1,r6,$0,r10
|
||||||
|
addl2 r3,r10
|
||||||
|
adwc r6,r11
|
||||||
|
subl2 r10,(r9)+
|
||||||
|
adwc r1,r11
|
||||||
|
|
||||||
|
jsobgtr r7,Loop2
|
||||||
|
movl r11,r0
|
||||||
|
ret
|
52
sysdeps/z8000/add_n.s
Normal file
52
sysdeps/z8000/add_n.s
Normal file
@ -0,0 +1,52 @@
|
|||||||
|
! Z8000 __mpn_add_n -- Add two limb vectors of equal, non-zero length.
|
||||||
|
|
||||||
|
! Copyright (C) 1993, 1994 Free Software Foundation, Inc.
|
||||||
|
|
||||||
|
! This file is part of the GNU MP Library.
|
||||||
|
|
||||||
|
! The GNU MP Library is free software; you can redistribute it and/or modify
|
||||||
|
! it under the terms of the GNU Library General Public License as published by
|
||||||
|
! the Free Software Foundation; either version 2 of the License, or (at your
|
||||||
|
! option) any later version.
|
||||||
|
|
||||||
|
! The GNU MP Library is distributed in the hope that it will be useful, but
|
||||||
|
! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
||||||
|
! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
|
||||||
|
! License for more details.
|
||||||
|
|
||||||
|
! You should have received a copy of the GNU Library General Public License
|
||||||
|
! along with the GNU MP Library; see the file COPYING.LIB. If not, write to
|
||||||
|
! the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||||
|
|
||||||
|
|
||||||
|
! INPUT PARAMETERS
|
||||||
|
! res_ptr r7
|
||||||
|
! s1_ptr r6
|
||||||
|
! s2_ptr r5
|
||||||
|
! size r4
|
||||||
|
|
||||||
|
! If we are really crazy, we can use push to write a few result words
|
||||||
|
! backwards, using push just because it is faster than reg+disp. We'd
|
||||||
|
! then add 2x the number of words written to r7...
|
||||||
|
|
||||||
|
unseg
|
||||||
|
.text
|
||||||
|
even
|
||||||
|
global ___mpn_add_n
|
||||||
|
___mpn_add_n:
|
||||||
|
pop r0,@r6
|
||||||
|
pop r1,@r5
|
||||||
|
add r0,r1
|
||||||
|
ld @r7,r0
|
||||||
|
dec r4
|
||||||
|
jr eq,Lend
|
||||||
|
Loop: pop r0,@r6
|
||||||
|
pop r1,@r5
|
||||||
|
adc r0,r1
|
||||||
|
inc r7,#2
|
||||||
|
ld @r7,r0
|
||||||
|
dec r4
|
||||||
|
jr ne,Loop
|
||||||
|
Lend: ld r2,r4 ! use 0 already in r4
|
||||||
|
adc r2,r2
|
||||||
|
ret t
|
67
sysdeps/z8000/mul_1.s
Normal file
67
sysdeps/z8000/mul_1.s
Normal file
@ -0,0 +1,67 @@
|
|||||||
|
! Z8000 __mpn_mul_1 -- Multiply a limb vector with a limb and store
|
||||||
|
! the result in a second limb vector.
|
||||||
|
|
||||||
|
! Copyright (C) 1993, 1994 Free Software Foundation, Inc.
|
||||||
|
|
||||||
|
! This file is part of the GNU MP Library.
|
||||||
|
|
||||||
|
! The GNU MP Library is free software; you can redistribute it and/or modify
|
||||||
|
! it under the terms of the GNU Library General Public License as published by
|
||||||
|
! the Free Software Foundation; either version 2 of the License, or (at your
|
||||||
|
! option) any later version.
|
||||||
|
|
||||||
|
! The GNU MP Library is distributed in the hope that it will be useful, but
|
||||||
|
! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
||||||
|
! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
|
||||||
|
! License for more details.
|
||||||
|
|
||||||
|
! You should have received a copy of the GNU Library General Public License
|
||||||
|
! along with the GNU MP Library; see the file COPYING.LIB. If not, write to
|
||||||
|
! the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||||
|
|
||||||
|
|
||||||
|
! INPUT PARAMETERS
|
||||||
|
! res_ptr r7
|
||||||
|
! s1_ptr r6
|
||||||
|
! size r5
|
||||||
|
! s2_limb r4
|
||||||
|
|
||||||
|
unseg
|
||||||
|
.text
|
||||||
|
even
|
||||||
|
global ___mpn_mul_1
|
||||||
|
___mpn_mul_1:
|
||||||
|
sub r2,r2 ! zero carry limb
|
||||||
|
and r4,r4
|
||||||
|
jr mi,Lneg
|
||||||
|
|
||||||
|
Lpos: pop r1,@r6
|
||||||
|
ld r9,r1
|
||||||
|
mult rr8,r4
|
||||||
|
and r1,r1 ! shift msb of loaded limb into cy
|
||||||
|
jr mi,Lp ! branch if loaded limb's msb is set
|
||||||
|
add r8,r4 ! hi_limb += sign_comp2
|
||||||
|
Lp: add r9,r2 ! lo_limb += cy_limb
|
||||||
|
xor r2,r2
|
||||||
|
adc r2,r8
|
||||||
|
ld @r7,r9
|
||||||
|
inc r7,#2
|
||||||
|
dec r5
|
||||||
|
jr ne,Lpos
|
||||||
|
ret t
|
||||||
|
|
||||||
|
Lneg: pop r1,@r6
|
||||||
|
ld r9,r1
|
||||||
|
mult rr8,r4
|
||||||
|
add r8,r1 ! hi_limb += sign_comp1
|
||||||
|
and r1,r1
|
||||||
|
jr mi,Ln
|
||||||
|
add r8,r4 ! hi_limb += sign_comp2
|
||||||
|
Ln: add r9,r2 ! lo_limb += cy_limb
|
||||||
|
xor r2,r2
|
||||||
|
adc r2,r8
|
||||||
|
ld @r7,r9
|
||||||
|
inc r7,#2
|
||||||
|
dec r5
|
||||||
|
jr ne,Lneg
|
||||||
|
ret t
|
53
sysdeps/z8000/sub_n.s
Normal file
53
sysdeps/z8000/sub_n.s
Normal file
@ -0,0 +1,53 @@
|
|||||||
|
! Z8000 __mpn_sub_n -- Subtract two limb vectors of the same length > 0 and
|
||||||
|
! store difference in a third limb vector.
|
||||||
|
|
||||||
|
! Copyright (C) 1993, 1994 Free Software Foundation, Inc.
|
||||||
|
|
||||||
|
! This file is part of the GNU MP Library.
|
||||||
|
|
||||||
|
! The GNU MP Library is free software; you can redistribute it and/or modify
|
||||||
|
! it under the terms of the GNU Library General Public License as published by
|
||||||
|
! the Free Software Foundation; either version 2 of the License, or (at your
|
||||||
|
! option) any later version.
|
||||||
|
|
||||||
|
! The GNU MP Library is distributed in the hope that it will be useful, but
|
||||||
|
! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
||||||
|
! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
|
||||||
|
! License for more details.
|
||||||
|
|
||||||
|
! You should have received a copy of the GNU Library General Public License
|
||||||
|
! along with the GNU MP Library; see the file COPYING.LIB. If not, write to
|
||||||
|
! the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||||
|
|
||||||
|
|
||||||
|
! INPUT PARAMETERS
|
||||||
|
! res_ptr r7
|
||||||
|
! s1_ptr r6
|
||||||
|
! s2_ptr r5
|
||||||
|
! size r4
|
||||||
|
|
||||||
|
! If we are really crazy, we can use push to write a few result words
|
||||||
|
! backwards, using push just because it is faster than reg+disp. We'd
|
||||||
|
! then add 2x the number of words written to r7...
|
||||||
|
|
||||||
|
unseg
|
||||||
|
.text
|
||||||
|
even
|
||||||
|
global ___mpn_sub_n
|
||||||
|
___mpn_sub_n:
|
||||||
|
pop r0,@r6
|
||||||
|
pop r1,@r5
|
||||||
|
sub r0,r1
|
||||||
|
ld @r7,r0
|
||||||
|
dec r4
|
||||||
|
jr eq,Lend
|
||||||
|
Loop: pop r0,@r6
|
||||||
|
pop r1,@r5
|
||||||
|
sbc r0,r1
|
||||||
|
inc r7,#2
|
||||||
|
ld @r7,r0
|
||||||
|
dec r4
|
||||||
|
jr ne,Loop
|
||||||
|
Lend: ld r2,r4 ! use 0 already in r4
|
||||||
|
adc r2,r2
|
||||||
|
ret t
|
Loading…
Reference in New Issue
Block a user