mirror of
https://sourceware.org/git/glibc.git
synced 2025-01-07 10:00:07 +00:00
cdd927d98c
This patch introduces a z13 specific ifunc variant for memmove. As the common code implementation, it checks if we can copy from the beginning to the end - with z196 memcpy implementation - or if we have to copy from the end to the beginning. The latter case is done by using vector load/store instructions. If vector instructions are not available, the common-code is used as fallback. Therefore it is implemented in memmove-c with a different name. Furthermore the ifunc logic decides if we need the common-code implementation at all. If vector instructions are supported due to the minimum architecture level set we can skip the common-code ifunc variant. ChangeLog: * sysdeps/s390/Makefile (sysdep_routines): Add memmove-c. * sysdeps/s390/ifunc-memcpy.h (HAVE_MEMMOVE_IFUNC, HAVE_MEMMOVE_IFUNC_AND_VX_SUPPORT, MEMMOVE_DEFAULT, HAVE_MEMMOVE_C, MEMMOVE_C, HAVE_MEMMOVE_Z13, MEMMOVE_Z13): New defines. * sysdeps/s390/memcpy-z900.S: Add z13 memmove implementation. * sysdeps/s390/memmove-c.c: New file. * sysdeps/s390/memmove.c: Likewise. * sysdeps/s390/multiarch/ifunc-impl-list.c (__libc_ifunc_impl_list): Add ifunc variants for memmove.
312 lines
8.2 KiB
ArmAsm
312 lines
8.2 KiB
ArmAsm
/* memcpy - copy a block from source to destination. 31/64 bit S/390 version.
|
|
Copyright (C) 2012-2018 Free Software Foundation, Inc.
|
|
This file is part of the GNU C Library.
|
|
|
|
The GNU C Library is free software; you can redistribute it and/or
|
|
modify it under the terms of the GNU Lesser General Public
|
|
License as published by the Free Software Foundation; either
|
|
version 2.1 of the License, or (at your option) any later version.
|
|
|
|
The GNU C Library is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
Lesser General Public License for more details.
|
|
|
|
You should have received a copy of the GNU Lesser General Public
|
|
License along with the GNU C Library; if not, see
|
|
<http://www.gnu.org/licenses/>. */
|
|
|
|
|
|
#include <sysdep.h>
|
|
#include "asm-syntax.h"
|
|
#include <ifunc-memcpy.h>
|
|
|
|
/* INPUT PARAMETERS
|
|
%r2 = address of destination memory area
|
|
%r3 = address of source memory area
|
|
%r4 = number of bytes to copy. */
|
|
|
|
.text
|
|
|
|
#if defined __s390x__
|
|
# define LTGR ltgr
|
|
# define CGHI cghi
|
|
# define LGR lgr
|
|
# define AGHI aghi
|
|
# define BRCTG brctg
|
|
#else
|
|
# define LTGR ltr
|
|
# define CGHI chi
|
|
# define LGR lr
|
|
# define AGHI ahi
|
|
# define BRCTG brct
|
|
#endif /* ! defined __s390x__ */
|
|
|
|
#if HAVE_MEMCPY_Z900_G5
|
|
ENTRY(MEMPCPY_Z900_G5)
|
|
# if defined __s390x__
|
|
.machine "z900"
|
|
# else
|
|
.machine "g5"
|
|
# endif /* ! defined __s390x__ */
|
|
LGR %r1,%r2 # Use as dest
|
|
la %r2,0(%r4,%r2) # Return dest + n
|
|
j .L_Z900_G5_start
|
|
END(MEMPCPY_Z900_G5)
|
|
|
|
ENTRY(MEMCPY_Z900_G5)
|
|
# if defined __s390x__
|
|
.machine "z900"
|
|
# else
|
|
.machine "g5"
|
|
# endif /* ! defined __s390x__ */
|
|
LGR %r1,%r2 # r1: Use as dest ; r2: Return dest
|
|
.L_Z900_G5_start:
|
|
LTGR %r4,%r4
|
|
je .L_Z900_G5_4
|
|
AGHI %r4,-1
|
|
# if defined __s390x__
|
|
srlg %r5,%r4,8
|
|
# else
|
|
lr %r5,%r4
|
|
srl %r5,8
|
|
# endif /* ! defined __s390x__ */
|
|
LTGR %r5,%r5
|
|
jne .L_Z900_G5_13
|
|
.L_Z900_G5_3:
|
|
# if defined __s390x__
|
|
larl %r5,.L_Z900_G5_15
|
|
# define Z900_G5_EX_D 0
|
|
# else
|
|
basr %r5,0
|
|
.L_Z900_G5_14:
|
|
# define Z900_G5_EX_D .L_Z900_G5_15-.L_Z900_G5_14
|
|
# endif /* ! defined __s390x__ */
|
|
ex %r4,Z900_G5_EX_D(%r5)
|
|
.L_Z900_G5_4:
|
|
br %r14
|
|
.L_Z900_G5_13:
|
|
CGHI %r5,4096 # Switch to mvcle for copies >1MB
|
|
jh __memcpy_mvcle
|
|
.L_Z900_G5_12:
|
|
mvc 0(256,%r1),0(%r3)
|
|
la %r1,256(%r1)
|
|
la %r3,256(%r3)
|
|
BRCTG %r5,.L_Z900_G5_12
|
|
j .L_Z900_G5_3
|
|
.L_Z900_G5_15:
|
|
mvc 0(1,%r1),0(%r3)
|
|
END(MEMCPY_Z900_G5)
|
|
#endif /* HAVE_MEMCPY_Z900_G5 */
|
|
|
|
ENTRY(__memcpy_mvcle)
|
|
# Using as standalone function will result in unexpected
|
|
# results since the length field is incremented by 1 in order to
|
|
# compensate the changes already done in the functions above.
|
|
LGR %r0,%r2 # backup return dest [ + n ]
|
|
AGHI %r4,1 # length + 1
|
|
LGR %r5,%r4 # source length
|
|
LGR %r4,%r3 # source address
|
|
LGR %r2,%r1 # destination address
|
|
LGR %r3,%r5 # destination length = source length
|
|
.L_MVCLE_1:
|
|
mvcle %r2,%r4,0 # thats it, MVCLE is your friend
|
|
jo .L_MVCLE_1
|
|
LGR %r2,%r0 # return destination address
|
|
br %r14
|
|
END(__memcpy_mvcle)
|
|
|
|
#undef LTGR
|
|
#undef CGHI
|
|
#undef LGR
|
|
#undef AGHI
|
|
#undef BRCTG
|
|
|
|
#if HAVE_MEMCPY_Z10
|
|
ENTRY(MEMPCPY_Z10)
|
|
.machine "z10"
|
|
.machinemode "zarch_nohighgprs"
|
|
lgr %r1,%r2 # Use as dest
|
|
la %r2,0(%r4,%r2) # Return dest + n
|
|
j .L_Z10_start
|
|
END(MEMPCPY_Z10)
|
|
|
|
ENTRY(MEMCPY_Z10)
|
|
.machine "z10"
|
|
.machinemode "zarch_nohighgprs"
|
|
lgr %r1,%r2 # r1: Use as dest ; r2: Return dest
|
|
.L_Z10_start:
|
|
# if !defined __s390x__
|
|
llgfr %r4,%r4
|
|
# endif /* !defined __s390x__ */
|
|
cgije %r4,0,.L_Z10_4
|
|
aghi %r4,-1
|
|
srlg %r5,%r4,8
|
|
cgijlh %r5,0,.L_Z10_13
|
|
.L_Z10_3:
|
|
exrl %r4,.L_Z10_15
|
|
.L_Z10_4:
|
|
br %r14
|
|
.L_Z10_13:
|
|
cgfi %r5,65535 # Switch to mvcle for copies >16MB
|
|
jh __memcpy_mvcle
|
|
.L_Z10_12:
|
|
pfd 1,768(%r3)
|
|
pfd 2,768(%r1)
|
|
mvc 0(256,%r1),0(%r3)
|
|
la %r1,256(%r1)
|
|
la %r3,256(%r3)
|
|
brctg %r5,.L_Z10_12
|
|
j .L_Z10_3
|
|
.L_Z10_15:
|
|
mvc 0(1,%r1),0(%r3)
|
|
END(MEMCPY_Z10)
|
|
#endif /* HAVE_MEMCPY_Z10 */
|
|
|
|
#if HAVE_MEMCPY_Z196
|
|
ENTRY(MEMPCPY_Z196)
|
|
.machine "z196"
|
|
.machinemode "zarch_nohighgprs"
|
|
lgr %r1,%r2 # Use as dest
|
|
la %r2,0(%r4,%r2) # Return dest + n
|
|
j .L_Z196_start
|
|
END(MEMPCPY_Z196)
|
|
|
|
ENTRY(MEMCPY_Z196)
|
|
.machine "z196"
|
|
.machinemode "zarch_nohighgprs"
|
|
lgr %r1,%r2 # r1: Use as dest ; r2: Return dest
|
|
.L_Z196_start:
|
|
# if !defined __s390x__
|
|
llgfr %r4,%r4
|
|
# endif /* !defined __s390x__ */
|
|
ltgr %r4,%r4
|
|
je .L_Z196_4
|
|
.L_Z196_start2:
|
|
aghi %r4,-1
|
|
srlg %r5,%r4,8
|
|
ltgr %r5,%r5
|
|
jne .L_Z196_5
|
|
.L_Z196_3:
|
|
exrl %r4,.L_Z196_14
|
|
.L_Z196_4:
|
|
br %r14
|
|
.L_Z196_5:
|
|
cgfi %r5,262144 # Switch to mvcle for copies >64MB
|
|
jh __memcpy_mvcle
|
|
.L_Z196_2:
|
|
pfd 1,768(%r3)
|
|
pfd 2,768(%r1)
|
|
mvc 0(256,%r1),0(%r3)
|
|
aghi %r5,-1
|
|
la %r1,256(%r1)
|
|
la %r3,256(%r3)
|
|
jne .L_Z196_2
|
|
j .L_Z196_3
|
|
.L_Z196_14:
|
|
mvc 0(1,%r1),0(%r3)
|
|
END(MEMCPY_Z196)
|
|
#endif /* HAVE_MEMCPY_Z196 */
|
|
|
|
#if HAVE_MEMMOVE_Z13
|
|
ENTRY(MEMMOVE_Z13)
|
|
.machine "z13"
|
|
.machinemode "zarch_nohighgprs"
|
|
# if !defined __s390x__
|
|
/* Note: The 31bit dst and src pointers are prefixed with zeroes. */
|
|
llgfr %r4,%r4
|
|
llgfr %r3,%r3
|
|
llgfr %r2,%r2
|
|
# endif /* !defined __s390x__ */
|
|
sgrk %r0,%r2,%r3
|
|
clgijh %r4,16,.L_MEMMOVE_Z13_LARGE
|
|
aghik %r5,%r4,-1
|
|
.L_MEMMOVE_Z13_SMALL:
|
|
jl .L_MEMMOVE_Z13_END /* Jump away if len was zero. */
|
|
/* Store up to 16 bytes with vll/vstl which needs the index
|
|
instead of lengths. */
|
|
vll %v16,%r5,0(%r3)
|
|
vstl %v16,%r5,0(%r2)
|
|
.L_MEMMOVE_Z13_END:
|
|
br %r14
|
|
.L_MEMMOVE_Z13_LARGE:
|
|
lgr %r1,%r2 /* For memcpy: r1: Use as dest ;
|
|
r2: Return dest */
|
|
/* The unsigned comparison (dst - src >= len) determines if we can
|
|
execute the forward case with memcpy. */
|
|
#if ! HAVE_MEMCPY_Z196
|
|
# error The z13 variant of memmove needs the z196 variant of memcpy!
|
|
#endif
|
|
clgrjhe %r0,%r4,.L_Z196_start2
|
|
risbgn %r5,%r4,4,128+63,60 /* r5 = r4 / 16 */
|
|
aghi %r4,-16
|
|
clgijhe %r5,8,.L_MEMMOVE_Z13_LARGE_64B
|
|
.L_MEMMOVE_Z13_LARGE_16B_LOOP:
|
|
/* Store at least 16 bytes with vl/vst. The number of 16byte blocks
|
|
is stored in r5. */
|
|
vl %v16,0(%r4,%r3)
|
|
vst %v16,0(%r4,%r2)
|
|
aghi %r4,-16
|
|
brctg %r5,.L_MEMMOVE_Z13_LARGE_16B_LOOP
|
|
aghik %r5,%r4,15
|
|
j .L_MEMMOVE_Z13_SMALL
|
|
.L_MEMMOVE_Z13_LARGE_64B:
|
|
/* Store at least 128 bytes with 4x vl/vst. The number of 64byte blocks
|
|
will be stored in r0. */
|
|
aghi %r4,-48
|
|
srlg %r0,%r5,2 /* r5 = %r0 / 4
|
|
=> Number of 64byte blocks. */
|
|
.L_MEMMOVE_Z13_LARGE_64B_LOOP:
|
|
vl %v20,48(%r4,%r3)
|
|
vl %v19,32(%r4,%r3)
|
|
vl %v18,16(%r4,%r3)
|
|
vl %v17,0(%r4,%r3)
|
|
vst %v20,48(%r4,%r2)
|
|
vst %v19,32(%r4,%r2)
|
|
vst %v18,16(%r4,%r2)
|
|
vst %v17,0(%r4,%r2)
|
|
aghi %r4,-64
|
|
brctg %r0,.L_MEMMOVE_Z13_LARGE_64B_LOOP
|
|
aghi %r4,48
|
|
/* Recalculate the number of 16byte blocks. */
|
|
risbg %r5,%r5,62,128+63,0 /* r5 = r5 & 3
|
|
=> Remaining 16byte blocks. */
|
|
jne .L_MEMMOVE_Z13_LARGE_16B_LOOP
|
|
aghik %r5,%r4,15
|
|
j .L_MEMMOVE_Z13_SMALL
|
|
END(MEMMOVE_Z13)
|
|
#endif /* HAVE_MEMMOVE_Z13 */
|
|
|
|
#if ! HAVE_MEMCPY_IFUNC
|
|
/* If we don't use ifunc, define an alias for mem[p]cpy here.
|
|
Otherwise see sysdeps/s390/mem[p]cpy.c. */
|
|
strong_alias (MEMCPY_DEFAULT, memcpy)
|
|
strong_alias (MEMPCPY_DEFAULT, __mempcpy)
|
|
weak_alias (__mempcpy, mempcpy)
|
|
#endif
|
|
|
|
#if ! HAVE_MEMMOVE_IFUNC
|
|
/* If we don't use ifunc, define an alias for memmove here.
|
|
Otherwise see sysdeps/s390/memmove.c. */
|
|
# if ! HAVE_MEMMOVE_C
|
|
/* If the c variant is needed, then sysdeps/s390/memmove-c.c
|
|
defines memmove.
|
|
Otherwise MEMMOVE_DEFAULT is implemented here and we have to define it. */
|
|
strong_alias (MEMMOVE_DEFAULT, memmove)
|
|
# endif
|
|
#endif
|
|
|
|
#if defined SHARED && IS_IN (libc)
|
|
/* Defines the internal symbols.
|
|
Compare to libc_hidden_[builtin_]def (mem[p]cpy) in string/mem[p]cpy.c. */
|
|
strong_alias (MEMCPY_DEFAULT, __GI_memcpy)
|
|
strong_alias (MEMPCPY_DEFAULT, __GI_mempcpy)
|
|
strong_alias (MEMPCPY_DEFAULT, __GI___mempcpy)
|
|
# if ! HAVE_MEMMOVE_C
|
|
/* If the c variant is needed, then sysdeps/s390/memmove-c.c
|
|
defines the internal symbol.
|
|
Otherwise MEMMOVE_DEFAULT is implemented here and we have to define it. */
|
|
strong_alias (MEMMOVE_DEFAULT, __GI_memmove)
|
|
# endif
|
|
#endif
|