mirror of
https://sourceware.org/git/glibc.git
synced 2024-11-22 04:50:07 +00:00
S390: Optimize __memset_z196.
It turned out that an 256b-mvc instruction which depends on the result of a previous 256b-mvc instruction is counterproductive. Therefore this patch adjusts the 256b-loop by storing the first byte with stc and setting the remaining 255b with mvc. Now the 255b-mvc instruction depends on the stc instruction.
This commit is contained in:
parent
0792c8ae1a
commit
1d21fb1061
@ -157,28 +157,27 @@ ENTRY(MEMSET_Z196)
|
||||
# if !defined __s390x__
|
||||
llgfr %r4,%r4
|
||||
# endif /* !defined __s390x__ */
|
||||
ltgr %r4,%r4
|
||||
je .L_Z196_4
|
||||
clgfi %r4,1
|
||||
jl .L_Z196_4 # n == 0
|
||||
stc %r3,0(%r2)
|
||||
je .L_Z196_4 # n == 1
|
||||
aghi %r4,-2
|
||||
lgr %r1,%r2
|
||||
cghi %r4,1
|
||||
je .L_Z196_4
|
||||
aghi %r4,-2
|
||||
srlg %r5,%r4,8
|
||||
ltgr %r5,%r5
|
||||
jne .L_Z196_1
|
||||
risbg %r5,%r4,8,128+63,56 # r5 = n / 256
|
||||
jne .L_Z196_1 # Jump away if r5 != 0
|
||||
.L_Z196_3:
|
||||
exrl %r4,.L_Z196_17
|
||||
.L_Z196_4:
|
||||
br %r14
|
||||
.L_Z196_1:
|
||||
cgfi %r5,1048576
|
||||
jh __memset_mvcle # Switch to mvcle for >256MB
|
||||
jh __memset_mvcle # Switch to mvcle for >256MB
|
||||
.L_Z196_2:
|
||||
pfd 2,1024(%r1)
|
||||
mvc 1(256,%r1),0(%r1)
|
||||
mvc 1(255,%r1),0(%r1)
|
||||
aghi %r5,-1
|
||||
la %r1,256(%r1)
|
||||
stc %r3,0(%r1)
|
||||
jne .L_Z196_2
|
||||
j .L_Z196_3
|
||||
.L_Z196_17:
|
||||
|
Loading…
Reference in New Issue
Block a user