glibc/sysdeps/x86_64/memset.S
H.J. Lu 5f92ec52e7 Replace %xmm8 with %xmm0
Since ld.so preserves vector registers now, we can use %xmm0 to avoid
the REX prefix.

	* sysdeps/x86_64/memset.S: Replace %xmm8 with %xmm0.
2015-08-25 08:48:34 -07:00

133 lines
3.1 KiB
ArmAsm

/* memset/bzero -- set memory area to CH/0
Optimized version for x86-64.
Copyright (C) 2002-2015 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */
#include <sysdep.h>
.text
#if IS_IN (libc)
ENTRY(__bzero)
movq %rdi, %rax /* Set return value. */
movq %rsi, %rdx /* Set n. */
pxor %xmm0, %xmm0
jmp L(entry_from_bzero)
END(__bzero)
weak_alias (__bzero, bzero)
/* Like memset but takes additional parameter with return value. */
ENTRY(__memset_tail)
movq %rcx, %rax /* Set return value. */
movd %esi, %xmm0
punpcklbw %xmm0, %xmm0
punpcklwd %xmm0, %xmm0
pshufd $0, %xmm0, %xmm0
jmp L(entry_from_bzero)
END(__memset_tail)
#endif
#if defined PIC && IS_IN (libc)
ENTRY_CHK (__memset_chk)
cmpq %rdx, %rcx
jb HIDDEN_JUMPTARGET (__chk_fail)
END_CHK (__memset_chk)
#endif
ENTRY (memset)
movd %esi, %xmm0
movq %rdi, %rax
punpcklbw %xmm0, %xmm0
punpcklwd %xmm0, %xmm0
pshufd $0, %xmm0, %xmm0
L(entry_from_bzero):
cmpq $64, %rdx
ja L(loop_start)
cmpq $16, %rdx
jbe L(less_16_bytes)
cmpq $32, %rdx
movdqu %xmm0, (%rdi)
movdqu %xmm0, -16(%rdi,%rdx)
ja L(between_32_64_bytes)
L(return):
rep
ret
.p2align 4
L(between_32_64_bytes):
movdqu %xmm0, 16(%rdi)
movdqu %xmm0, -32(%rdi,%rdx)
ret
.p2align 4
L(loop_start):
leaq 64(%rdi), %rcx
movdqu %xmm0, (%rdi)
andq $-64, %rcx
movdqu %xmm0, -16(%rdi,%rdx)
movdqu %xmm0, 16(%rdi)
movdqu %xmm0, -32(%rdi,%rdx)
movdqu %xmm0, 32(%rdi)
movdqu %xmm0, -48(%rdi,%rdx)
movdqu %xmm0, 48(%rdi)
movdqu %xmm0, -64(%rdi,%rdx)
addq %rdi, %rdx
andq $-64, %rdx
cmpq %rdx, %rcx
je L(return)
.p2align 4
L(loop):
movdqa %xmm0, (%rcx)
movdqa %xmm0, 16(%rcx)
movdqa %xmm0, 32(%rcx)
movdqa %xmm0, 48(%rcx)
addq $64, %rcx
cmpq %rcx, %rdx
jne L(loop)
rep
ret
L(less_16_bytes):
movq %xmm0, %rcx
testb $24, %dl
jne L(between8_16bytes)
testb $4, %dl
jne L(between4_7bytes)
testb $1, %dl
je L(odd_byte)
movb %cl, (%rdi)
L(odd_byte):
testb $2, %dl
je L(return)
movw %cx, -2(%rax,%rdx)
ret
L(between4_7bytes):
movl %ecx, (%rdi)
movl %ecx, -4(%rdi,%rdx)
ret
L(between8_16bytes):
movq %rcx, (%rdi)
movq %rcx, -8(%rdi,%rdx)
ret
END (memset)
libc_hidden_builtin_def (memset)
#if defined PIC && IS_IN (libc) && !defined USE_MULTIARCH
strong_alias (__memset_chk, __memset_zero_constant_len_parameter)
.section .gnu.warning.__memset_zero_constant_len_parameter
.string "memset used with constant zero length parameter; this could be due to transposed parameters"
#endif