mirror of
https://sourceware.org/git/glibc.git
synced 2024-11-14 17:11:06 +00:00
* sysdeps/mips/mips64/memcpy.S: Fix porting bug that broke
unaligned copying of 8-15 bytes. From Chris Demetriou <cgd@broadcom.com>. Fix label names. * sysdeps/mips/mips64/memset.S: Fix label names. Make similar change as to memcpy.S. * sysdeps/mips/memcpy.S: Formatting changes. * sysdeps/mips/memset.S: Likewise.
This commit is contained in:
parent
4208b5c771
commit
2b15a21180
@ -54,12 +54,14 @@ ENTRY (memcpy)
|
|||||||
SWHI t0, 0(a0)
|
SWHI t0, 0(a0)
|
||||||
addu a0, t1
|
addu a0, t1
|
||||||
|
|
||||||
L(chk8w): andi t0, a2, 0x1f # 32 or more bytes left?
|
L(chk8w):
|
||||||
|
andi t0, a2, 0x1f # 32 or more bytes left?
|
||||||
beq t0, a2, L(chk1w)
|
beq t0, a2, L(chk1w)
|
||||||
subu a3, a2, t0 # Yes
|
subu a3, a2, t0 # Yes
|
||||||
addu a3, a1 # a3 = end address of loop
|
addu a3, a1 # a3 = end address of loop
|
||||||
move a2, t0 # a2 = what will be left after loop
|
move a2, t0 # a2 = what will be left after loop
|
||||||
L(lop8w): lw t0, 0(a1) # Loop taking 8 words at a time
|
L(lop8w):
|
||||||
|
lw t0, 0(a1) # Loop taking 8 words at a time
|
||||||
lw t1, 4(a1)
|
lw t1, 4(a1)
|
||||||
lw t2, 8(a1)
|
lw t2, 8(a1)
|
||||||
lw t3, 12(a1)
|
lw t3, 12(a1)
|
||||||
@ -79,28 +81,34 @@ L(lop8w): lw t0, 0(a1) # Loop taking 8 words at a time
|
|||||||
bne a1, a3, L(lop8w)
|
bne a1, a3, L(lop8w)
|
||||||
sw t7, -4(a0)
|
sw t7, -4(a0)
|
||||||
|
|
||||||
L(chk1w): andi t0, a2, 0x3 # 4 or more bytes left?
|
L(chk1w):
|
||||||
|
andi t0, a2, 0x3 # 4 or more bytes left?
|
||||||
beq t0, a2, L(last8)
|
beq t0, a2, L(last8)
|
||||||
subu a3, a2, t0 # Yes, handle them one word at a time
|
subu a3, a2, t0 # Yes, handle them one word at a time
|
||||||
addu a3, a1 # a3 again end address
|
addu a3, a1 # a3 again end address
|
||||||
move a2, t0
|
move a2, t0
|
||||||
L(lop1w): lw t0, 0(a1)
|
L(lop1w):
|
||||||
|
lw t0, 0(a1)
|
||||||
addiu a0, 4
|
addiu a0, 4
|
||||||
addiu a1, 4
|
addiu a1, 4
|
||||||
bne a1, a3, L(lop1w)
|
bne a1, a3, L(lop1w)
|
||||||
sw t0, -4(a0)
|
sw t0, -4(a0)
|
||||||
|
|
||||||
L(last8): blez a2, L(lst8e) # Handle last 8 bytes, one at a time
|
L(last8):
|
||||||
|
blez a2, L(lst8e) # Handle last 8 bytes, one at a time
|
||||||
addu a3, a2, a1
|
addu a3, a2, a1
|
||||||
L(lst8l): lb t0, 0(a1)
|
L(lst8l):
|
||||||
|
lb t0, 0(a1)
|
||||||
addiu a0, 1
|
addiu a0, 1
|
||||||
addiu a1, 1
|
addiu a1, 1
|
||||||
bne a1, a3, L(lst8l)
|
bne a1, a3, L(lst8l)
|
||||||
sb t0, -1(a0)
|
sb t0, -1(a0)
|
||||||
L(lst8e): jr ra # Bye, bye
|
L(lst8e):
|
||||||
|
jr ra # Bye, bye
|
||||||
nop
|
nop
|
||||||
|
|
||||||
L(shift): subu a3, zero, a0 # Src and Dest unaligned
|
L(shift):
|
||||||
|
subu a3, zero, a0 # Src and Dest unaligned
|
||||||
andi a3, 0x3 # (unoptimized case...)
|
andi a3, 0x3 # (unoptimized case...)
|
||||||
beq a3, zero, L(shft1)
|
beq a3, zero, L(shft1)
|
||||||
subu a2, a3 # a2 = bytes left
|
subu a2, a3 # a2 = bytes left
|
||||||
@ -109,16 +117,18 @@ L(shift): subu a3, zero, a0 # Src and Dest unaligned
|
|||||||
addu a1, a3
|
addu a1, a3
|
||||||
SWHI t0, 0(a0)
|
SWHI t0, 0(a0)
|
||||||
addu a0, a3
|
addu a0, a3
|
||||||
L(shft1): andi t0, a2, 0x3
|
L(shft1):
|
||||||
|
andi t0, a2, 0x3
|
||||||
subu a3, a2, t0
|
subu a3, a2, t0
|
||||||
addu a3, a1
|
addu a3, a1
|
||||||
L(shfth): LWHI t1, 0(a1) # Limp through, word by word
|
L(shfth):
|
||||||
|
LWHI t1, 0(a1) # Limp through, word by word
|
||||||
LWLO t1, 3(a1)
|
LWLO t1, 3(a1)
|
||||||
addiu a0, 4
|
addiu a0, 4
|
||||||
addiu a1, 4
|
addiu a1, 4
|
||||||
bne a1, a3, L(shfth)
|
bne a1, a3, L(shfth)
|
||||||
sw t1, -4(a0)
|
sw t1, -4(a0)
|
||||||
b L(last8) # Handle anything which may be left
|
b L(last8) # Handle anything which may be left
|
||||||
move a2, t0
|
move a2, t0
|
||||||
|
|
||||||
.set reorder
|
.set reorder
|
||||||
|
@ -43,35 +43,42 @@ ENTRY (memset)
|
|||||||
sll t0, a1, 16
|
sll t0, a1, 16
|
||||||
or a1, t0 # a1 is now pattern in full word
|
or a1, t0 # a1 is now pattern in full word
|
||||||
|
|
||||||
L(ueven): subu t0, zero, a0 # Unaligned address?
|
L(ueven):
|
||||||
|
subu t0, zero, a0 # Unaligned address?
|
||||||
andi t0, 0x3
|
andi t0, 0x3
|
||||||
beq t0, zero, L(chkw)
|
beq t0, zero, L(chkw)
|
||||||
subu a2, t0
|
subu a2, t0
|
||||||
SWHI a1, 0(a0) # Yes, handle first unaligned part
|
SWHI a1, 0(a0) # Yes, handle first unaligned part
|
||||||
addu a0, t0 # Now both a0 and a2 are updated
|
addu a0, t0 # Now both a0 and a2 are updated
|
||||||
|
|
||||||
L(chkw): andi t0, a2, 0x7 # Enough left for one loop iteration?
|
L(chkw):
|
||||||
|
andi t0, a2, 0x7 # Enough left for one loop iteration?
|
||||||
beq t0, a2, L(chkl)
|
beq t0, a2, L(chkl)
|
||||||
subu a3, a2, t0
|
subu a3, a2, t0
|
||||||
addu a3, a0 # a3 is last loop address +1
|
addu a3, a0 # a3 is last loop address +1
|
||||||
move a2, t0 # a2 is now # of bytes left after loop
|
move a2, t0 # a2 is now # of bytes left after loop
|
||||||
L(loopw): addiu a0, 8 # Handle 2 words pr. iteration
|
L(loopw):
|
||||||
|
addiu a0, 8 # Handle 2 words pr. iteration
|
||||||
sw a1, -8(a0)
|
sw a1, -8(a0)
|
||||||
bne a0, a3, L(loopw)
|
bne a0, a3, L(loopw)
|
||||||
sw a1, -4(a0)
|
sw a1, -4(a0)
|
||||||
|
|
||||||
L(chkl): andi t0, a2, 0x4 # Check if there is at least a full
|
L(chkl):
|
||||||
|
andi t0, a2, 0x4 # Check if there is at least a full
|
||||||
beq t0, zero, L(last8) # word remaining after the loop
|
beq t0, zero, L(last8) # word remaining after the loop
|
||||||
subu a2, t0
|
subu a2, t0
|
||||||
sw a1, 0(a0) # Yes...
|
sw a1, 0(a0) # Yes...
|
||||||
addiu a0, 4
|
addiu a0, 4
|
||||||
|
|
||||||
L(last8): blez a2, L(exit) # Handle last 8 bytes (if cnt>0)
|
L(last8):
|
||||||
|
blez a2, L(exit) # Handle last 8 bytes (if cnt>0)
|
||||||
addu a3, a2, a0 # a3 is last address +1
|
addu a3, a2, a0 # a3 is last address +1
|
||||||
L(lst8l): addiu a0, 1
|
L(lst8l):
|
||||||
|
addiu a0, 1
|
||||||
bne a0, a3, L(lst8l)
|
bne a0, a3, L(lst8l)
|
||||||
sb a1, -1(a0)
|
sb a1, -1(a0)
|
||||||
L(exit): j ra # Bye, bye
|
L(exit):
|
||||||
|
j ra # Bye, bye
|
||||||
nop
|
nop
|
||||||
|
|
||||||
.set reorder
|
.set reorder
|
||||||
|
@ -42,8 +42,8 @@
|
|||||||
ENTRY (memcpy)
|
ENTRY (memcpy)
|
||||||
.set noreorder
|
.set noreorder
|
||||||
|
|
||||||
slti a4, a2, 8 # Less than 8?
|
slti a4, a2, 16 # Less than 16?
|
||||||
bne a4, zero, L(last8)
|
bne a4, zero, L(last16)
|
||||||
move v0, a0 # Setup exit value before too late
|
move v0, a0 # Setup exit value before too late
|
||||||
|
|
||||||
xor a4, a1, a0 # Find a0/a1 displacement
|
xor a4, a1, a0 # Find a0/a1 displacement
|
||||||
@ -86,9 +86,9 @@ L(lop8w):
|
|||||||
sd t7, -8(a0)
|
sd t7, -8(a0)
|
||||||
|
|
||||||
L(chk1w):
|
L(chk1w):
|
||||||
andi a4, a2, 0x7 # 4 or more bytes left?
|
andi a4, a2, 0x7 # 8 or more bytes left?
|
||||||
beq a4, a2, L(last8)
|
beq a4, a2, L(last16)
|
||||||
PTR_SUBU a3, a2, a4 # Yes, handle them one word at a time
|
PTR_SUBU a3, a2, a4 # Yes, handle them one dword at a time
|
||||||
PTR_ADDU a3, a1 # a3 again end address
|
PTR_ADDU a3, a1 # a3 again end address
|
||||||
move a2, a4
|
move a2, a4
|
||||||
L(lop1w):
|
L(lop1w):
|
||||||
@ -98,41 +98,41 @@ L(lop1w):
|
|||||||
bne a1, a3, L(lop1w)
|
bne a1, a3, L(lop1w)
|
||||||
sd a4, -8(a0)
|
sd a4, -8(a0)
|
||||||
|
|
||||||
L(last8):
|
L(last16):
|
||||||
blez a2, L(lst8e) # Handle last 8 bytes, one at a time
|
blez a2, L(lst16e) # Handle last 16 bytes, one at a time
|
||||||
PTR_ADDU a3, a2, a1
|
PTR_ADDU a3, a2, a1
|
||||||
L(lst8l):
|
L(lst16l):
|
||||||
lb a4, 0(a1)
|
lb a4, 0(a1)
|
||||||
PTR_ADDIU a0, 1
|
PTR_ADDIU a0, 1
|
||||||
PTR_ADDIU a1, 1
|
PTR_ADDIU a1, 1
|
||||||
bne a1, a3, L(lst8l)
|
bne a1, a3, L(lst16l)
|
||||||
sb a4, -1(a0)
|
sb a4, -1(a0)
|
||||||
L(lst8e):
|
L(lst16e):
|
||||||
jr ra # Bye, bye
|
jr ra # Bye, bye
|
||||||
nop
|
nop
|
||||||
|
|
||||||
L(shift):
|
L(shift):
|
||||||
PTR_SUBU a3, zero, a0 # Src and Dest unaligned
|
PTR_SUBU a3, zero, a0 # Src and Dest unaligned
|
||||||
andi a3, 0x7 # (unoptimized case...)
|
andi a3, 0x7 # (unoptimized case...)
|
||||||
beq a3, zero, L(shfa5)
|
beq a3, zero, L(shft1)
|
||||||
PTR_SUBU a2, a3 # a2 = bytes left
|
PTR_SUBU a2, a3 # a2 = bytes left
|
||||||
LDHI a4, 0(a1) # Take care of first odd part
|
LDHI a4, 0(a1) # Take care of first odd part
|
||||||
LDLO a4, 7(a1)
|
LDLO a4, 7(a1)
|
||||||
PTR_ADDU a1, a3
|
PTR_ADDU a1, a3
|
||||||
SDHI a4, 0(a0)
|
SDHI a4, 0(a0)
|
||||||
PTR_ADDU a0, a3
|
PTR_ADDU a0, a3
|
||||||
L(shfa5):
|
L(shft1):
|
||||||
andi a4, a2, 0x7
|
andi a4, a2, 0x7
|
||||||
PTR_SUBU a3, a2, a4
|
PTR_SUBU a3, a2, a4
|
||||||
PTR_ADDU a3, a1
|
PTR_ADDU a3, a1
|
||||||
L(shfth):
|
L(shfth):
|
||||||
LDHI a5, 0(a1) # Limp through, word by word
|
LDHI a5, 0(a1) # Limp through, dword by dword
|
||||||
LDLO a5, 7(a1)
|
LDLO a5, 7(a1)
|
||||||
PTR_ADDIU a0, 8
|
PTR_ADDIU a0, 8
|
||||||
PTR_ADDIU a1, 8
|
PTR_ADDIU a1, 8
|
||||||
bne a1, a3, L(shfth)
|
bne a1, a3, L(shfth)
|
||||||
sd a5, -8(a0)
|
sd a5, -8(a0)
|
||||||
b L(last8) # Handle anything which may be left
|
b L(last16) # Handle anything which may be left
|
||||||
move a2, a4
|
move a2, a4
|
||||||
|
|
||||||
.set reorder
|
.set reorder
|
||||||
|
@ -36,8 +36,8 @@
|
|||||||
ENTRY (memset)
|
ENTRY (memset)
|
||||||
.set noreorder
|
.set noreorder
|
||||||
|
|
||||||
slti t5, a2, 8 # Less than 8?
|
slti t5, a2, 16 # Less than 16?
|
||||||
bne t5, zero, L(last8)
|
bne t5, zero, L(last16)
|
||||||
move v0, a0 # Setup exit value before too late
|
move v0, a0 # Setup exit value before too late
|
||||||
|
|
||||||
beq a1, zero, L(ueven) # If zero pattern, no need to extend
|
beq a1, zero, L(ueven) # If zero pattern, no need to extend
|
||||||
@ -64,24 +64,24 @@ L(chkw):
|
|||||||
PTR_ADDU a3, a0 # a3 is last loop address +1
|
PTR_ADDU a3, a0 # a3 is last loop address +1
|
||||||
move a2, t4 # a2 is now # of bytes left after loop
|
move a2, t4 # a2 is now # of bytes left after loop
|
||||||
L(loopw):
|
L(loopw):
|
||||||
PTR_ADDIU a0, 16 # Handle 2 words pr. iteration
|
PTR_ADDIU a0, 16 # Handle 2 dwords pr. iteration
|
||||||
sd a1, -16(a0)
|
sd a1, -16(a0)
|
||||||
bne a0, a3, L(loopw)
|
bne a0, a3, L(loopw)
|
||||||
sd a1, -8(a0)
|
sd a1, -8(a0)
|
||||||
|
|
||||||
L(chkl):
|
L(chkl):
|
||||||
andi t4, a2, 0x8 # Check if there is at least a double
|
andi t4, a2, 0x8 # Check if there is at least a double
|
||||||
beq t4, zero, L(last8) # word remaining after the loop
|
beq t4, zero, L(last16) # word remaining after the loop
|
||||||
PTR_SUBU a2, t4
|
PTR_SUBU a2, t4
|
||||||
sd a1, 0(a0) # Yes...
|
sd a1, 0(a0) # Yes...
|
||||||
PTR_ADDIU a0, 8
|
PTR_ADDIU a0, 8
|
||||||
|
|
||||||
L(last8):
|
L(last16):
|
||||||
blez a2, L(exit) # Handle last 8 bytes (if cnt>0)
|
blez a2, L(exit) # Handle last 16 bytes (if cnt>0)
|
||||||
PTR_ADDU a3, a2, a0 # a3 is last address +1
|
PTR_ADDU a3, a2, a0 # a3 is last address +1
|
||||||
L(lst8l):
|
L(lst16l):
|
||||||
PTR_ADDIU a0, 1
|
PTR_ADDIU a0, 1
|
||||||
bne a0, a3, L(lst8l)
|
bne a0, a3, L(lst16l)
|
||||||
sb a1, -1(a0)
|
sb a1, -1(a0)
|
||||||
L(exit):
|
L(exit):
|
||||||
j ra # Bye, bye
|
j ra # Bye, bye
|
||||||
|
Loading…
Reference in New Issue
Block a user