glibc/sysdeps/x86_64/multiarch/strcpy-ssse3.S
Siddhesh Poyarekar 30891f35fa Remove "Contributed by" lines
We stopped adding "Contributed by" or similar lines in sources in 2012
in favour of git logs and keeping the Contributors section of the
glibc manual up to date.  Removing these lines makes the license
header a bit more consistent across files and also removes the
possibility of error in attribution when license blocks or files are
copied across since the contributed-by lines don't actually reflect
reality in those cases.

Move all "Contributed by" and similar lines (Written by, Test by,
etc.) into a new file CONTRIBUTED-BY to retain record of these
contributions.  These contributors are also mentioned in
manual/contrib.texi, so we just maintain this additional record as a
courtesy to the earlier developers.

The following scripts were used to filter a list of files to edit in
place and to clean up the CONTRIBUTED-BY file respectively.  These
were not added to the glibc sources because they're not expected to be
of any use in future given that this is a one time task:

https://gist.github.com/siddhesh/b5ecac94eabfd72ed2916d6d8157e7dc
https://gist.github.com/siddhesh/15ea1f5e435ace9774f485030695ee02

Reviewed-by: Carlos O'Donell <carlos@redhat.com>
2021-09-03 22:06:44 +05:30

3551 lines
62 KiB
ArmAsm

/* strcpy with SSSE3
Copyright (C) 2011-2021 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
#if IS_IN (libc)
# ifndef USE_AS_STRCAT
# include <sysdep.h>
# ifndef STRCPY
# define STRCPY __strcpy_ssse3
# endif
.section .text.ssse3,"ax",@progbits
ENTRY (STRCPY)
mov %rsi, %rcx
# ifdef USE_AS_STRNCPY
mov %RDX_LP, %R8_LP
# endif
mov %rdi, %rdx
# ifdef USE_AS_STRNCPY
test %R8_LP, %R8_LP
jz L(Exit0)
cmp $8, %R8_LP
jbe L(StrncpyExit8Bytes)
# endif
cmpb $0, (%rcx)
jz L(Exit1)
cmpb $0, 1(%rcx)
jz L(Exit2)
cmpb $0, 2(%rcx)
jz L(Exit3)
cmpb $0, 3(%rcx)
jz L(Exit4)
cmpb $0, 4(%rcx)
jz L(Exit5)
cmpb $0, 5(%rcx)
jz L(Exit6)
cmpb $0, 6(%rcx)
jz L(Exit7)
cmpb $0, 7(%rcx)
jz L(Exit8)
# ifdef USE_AS_STRNCPY
cmp $16, %r8
jb L(StrncpyExit15Bytes)
# endif
cmpb $0, 8(%rcx)
jz L(Exit9)
cmpb $0, 9(%rcx)
jz L(Exit10)
cmpb $0, 10(%rcx)
jz L(Exit11)
cmpb $0, 11(%rcx)
jz L(Exit12)
cmpb $0, 12(%rcx)
jz L(Exit13)
cmpb $0, 13(%rcx)
jz L(Exit14)
cmpb $0, 14(%rcx)
jz L(Exit15)
# ifdef USE_AS_STRNCPY
cmp $16, %r8
je L(Exit16)
# endif
cmpb $0, 15(%rcx)
jz L(Exit16)
# endif
# ifdef USE_AS_STRNCPY
mov %rcx, %rsi
sub $16, %r8
and $0xf, %rsi
/* add 16 bytes rcx_offset to r8 */
add %rsi, %r8
# endif
lea 16(%rcx), %rsi
and $-16, %rsi
pxor %xmm0, %xmm0
mov (%rcx), %r9
mov %r9, (%rdx)
pcmpeqb (%rsi), %xmm0
mov 8(%rcx), %r9
mov %r9, 8(%rdx)
/* convert byte mask in xmm0 to bit mask */
pmovmskb %xmm0, %rax
sub %rcx, %rsi
# ifdef USE_AS_STRNCPY
sub $16, %r8
jbe L(CopyFrom1To16BytesCase2OrCase3)
# endif
test %rax, %rax
jnz L(CopyFrom1To16Bytes)
mov %rdx, %rax
lea 16(%rdx), %rdx
and $-16, %rdx
sub %rdx, %rax
# ifdef USE_AS_STRNCPY
add %rax, %rsi
lea -1(%rsi), %rsi
and $1<<31, %esi
test %rsi, %rsi
jnz L(ContinueCopy)
lea 16(%r8), %r8
L(ContinueCopy):
# endif
sub %rax, %rcx
mov %rcx, %rax
and $0xf, %rax
mov $0, %rsi
/* case: rcx_offset == rdx_offset */
jz L(Align16Both)
cmp $8, %rax
jae L(ShlHigh8)
cmp $1, %rax
je L(Shl1)
cmp $2, %rax
je L(Shl2)
cmp $3, %rax
je L(Shl3)
cmp $4, %rax
je L(Shl4)
cmp $5, %rax
je L(Shl5)
cmp $6, %rax
je L(Shl6)
jmp L(Shl7)
L(ShlHigh8):
je L(Shl8)
cmp $9, %rax
je L(Shl9)
cmp $10, %rax
je L(Shl10)
cmp $11, %rax
je L(Shl11)
cmp $12, %rax
je L(Shl12)
cmp $13, %rax
je L(Shl13)
cmp $14, %rax
je L(Shl14)
jmp L(Shl15)
L(Align16Both):
movaps (%rcx), %xmm1
movaps 16(%rcx), %xmm2
movaps %xmm1, (%rdx)
pcmpeqb %xmm2, %xmm0
pmovmskb %xmm0, %rax
lea 16(%rsi), %rsi
# ifdef USE_AS_STRNCPY
sub $16, %r8
jbe L(CopyFrom1To16BytesCase2OrCase3)
# endif
test %rax, %rax
jnz L(CopyFrom1To16Bytes)
movaps 16(%rcx, %rsi), %xmm3
movaps %xmm2, (%rdx, %rsi)
pcmpeqb %xmm3, %xmm0
pmovmskb %xmm0, %rax
lea 16(%rsi), %rsi
# ifdef USE_AS_STRNCPY
sub $16, %r8
jbe L(CopyFrom1To16BytesCase2OrCase3)
# endif
test %rax, %rax
jnz L(CopyFrom1To16Bytes)
movaps 16(%rcx, %rsi), %xmm4
movaps %xmm3, (%rdx, %rsi)
pcmpeqb %xmm4, %xmm0
pmovmskb %xmm0, %rax
lea 16(%rsi), %rsi
# ifdef USE_AS_STRNCPY
sub $16, %r8
jbe L(CopyFrom1To16BytesCase2OrCase3)
# endif
test %rax, %rax
jnz L(CopyFrom1To16Bytes)
movaps 16(%rcx, %rsi), %xmm1
movaps %xmm4, (%rdx, %rsi)
pcmpeqb %xmm1, %xmm0
pmovmskb %xmm0, %rax
lea 16(%rsi), %rsi
# ifdef USE_AS_STRNCPY
sub $16, %r8
jbe L(CopyFrom1To16BytesCase2OrCase3)
# endif
test %rax, %rax
jnz L(CopyFrom1To16Bytes)
movaps 16(%rcx, %rsi), %xmm2
movaps %xmm1, (%rdx, %rsi)
pcmpeqb %xmm2, %xmm0
pmovmskb %xmm0, %rax
lea 16(%rsi), %rsi
# ifdef USE_AS_STRNCPY
sub $16, %r8
jbe L(CopyFrom1To16BytesCase2OrCase3)
# endif
test %rax, %rax
jnz L(CopyFrom1To16Bytes)
movaps 16(%rcx, %rsi), %xmm3
movaps %xmm2, (%rdx, %rsi)
pcmpeqb %xmm3, %xmm0
pmovmskb %xmm0, %rax
lea 16(%rsi), %rsi
# ifdef USE_AS_STRNCPY
sub $16, %r8
jbe L(CopyFrom1To16BytesCase2OrCase3)
# endif
test %rax, %rax
jnz L(CopyFrom1To16Bytes)
movaps %xmm3, (%rdx, %rsi)
mov %rcx, %rax
lea 16(%rcx, %rsi), %rcx
and $-0x40, %rcx
sub %rcx, %rax
sub %rax, %rdx
# ifdef USE_AS_STRNCPY
lea 112(%r8, %rax), %r8
# endif
mov $-0x40, %rsi
.p2align 4
L(Aligned64Loop):
movaps (%rcx), %xmm2
movaps %xmm2, %xmm4
movaps 16(%rcx), %xmm5
movaps 32(%rcx), %xmm3
movaps %xmm3, %xmm6
movaps 48(%rcx), %xmm7
pminub %xmm5, %xmm2
pminub %xmm7, %xmm3
pminub %xmm2, %xmm3
pcmpeqb %xmm0, %xmm3
pmovmskb %xmm3, %rax
lea 64(%rdx), %rdx
lea 64(%rcx), %rcx
# ifdef USE_AS_STRNCPY
sub $64, %r8
jbe L(StrncpyLeaveCase2OrCase3)
# endif
test %rax, %rax
jnz L(Aligned64Leave)
movaps %xmm4, -64(%rdx)
movaps %xmm5, -48(%rdx)
movaps %xmm6, -32(%rdx)
movaps %xmm7, -16(%rdx)
jmp L(Aligned64Loop)
L(Aligned64Leave):
# ifdef USE_AS_STRNCPY
lea 48(%r8), %r8
# endif
pcmpeqb %xmm4, %xmm0
pmovmskb %xmm0, %rax
test %rax, %rax
jnz L(CopyFrom1To16Bytes)
pcmpeqb %xmm5, %xmm0
# ifdef USE_AS_STRNCPY
lea -16(%r8), %r8
# endif
pmovmskb %xmm0, %rax
movaps %xmm4, -64(%rdx)
test %rax, %rax
lea 16(%rsi), %rsi
jnz L(CopyFrom1To16Bytes)
pcmpeqb %xmm6, %xmm0
# ifdef USE_AS_STRNCPY
lea -16(%r8), %r8
# endif
pmovmskb %xmm0, %rax
movaps %xmm5, -48(%rdx)
test %rax, %rax
lea 16(%rsi), %rsi
jnz L(CopyFrom1To16Bytes)
movaps %xmm6, -32(%rdx)
pcmpeqb %xmm7, %xmm0
# ifdef USE_AS_STRNCPY
lea -16(%r8), %r8
# endif
pmovmskb %xmm0, %rax
lea 16(%rsi), %rsi
jmp L(CopyFrom1To16Bytes)
.p2align 4
L(Shl1):
movaps -1(%rcx), %xmm1
movaps 15(%rcx), %xmm2
L(Shl1Start):
pcmpeqb %xmm2, %xmm0
pmovmskb %xmm0, %rax
movaps %xmm2, %xmm3
# ifdef USE_AS_STRNCPY
sub $16, %r8
jbe L(StrncpyExit1Case2OrCase3)
# endif
test %rax, %rax
jnz L(Shl1LoopExit)
palignr $1, %xmm1, %xmm2
movaps %xmm2, (%rdx)
movaps 31(%rcx), %xmm2
pcmpeqb %xmm2, %xmm0
lea 16(%rdx), %rdx
pmovmskb %xmm0, %rax
lea 16(%rcx), %rcx
movaps %xmm2, %xmm1
# ifdef USE_AS_STRNCPY
sub $16, %r8
jbe L(StrncpyExit1Case2OrCase3)
# endif
test %rax, %rax
jnz L(Shl1LoopExit)
palignr $1, %xmm3, %xmm2
movaps %xmm2, (%rdx)
movaps 31(%rcx), %xmm2
pcmpeqb %xmm2, %xmm0
lea 16(%rdx), %rdx
pmovmskb %xmm0, %rax
lea 16(%rcx), %rcx
movaps %xmm2, %xmm3
# ifdef USE_AS_STRNCPY
sub $16, %r8
jbe L(StrncpyExit1Case2OrCase3)
# endif
test %rax, %rax
jnz L(Shl1LoopExit)
palignr $1, %xmm1, %xmm2
movaps %xmm2, (%rdx)
movaps 31(%rcx), %xmm2
pcmpeqb %xmm2, %xmm0
lea 16(%rdx), %rdx
pmovmskb %xmm0, %rax
lea 16(%rcx), %rcx
# ifdef USE_AS_STRNCPY
sub $16, %r8
jbe L(StrncpyExit1Case2OrCase3)
# endif
test %rax, %rax
jnz L(Shl1LoopExit)
palignr $1, %xmm3, %xmm2
movaps %xmm2, (%rdx)
lea 31(%rcx), %rcx
lea 16(%rdx), %rdx
mov %rcx, %rax
and $-0x40, %rcx
sub %rcx, %rax
lea -15(%rcx), %rcx
sub %rax, %rdx
# ifdef USE_AS_STRNCPY
add %rax, %r8
# endif
movaps -1(%rcx), %xmm1
/* 64 bytes loop */
.p2align 4
L(Shl1LoopStart):
movaps 15(%rcx), %xmm2
movaps 31(%rcx), %xmm3
movaps %xmm3, %xmm6
movaps 47(%rcx), %xmm4
movaps %xmm4, %xmm7
movaps 63(%rcx), %xmm5
pminub %xmm2, %xmm6
pminub %xmm5, %xmm7
pminub %xmm6, %xmm7
pcmpeqb %xmm0, %xmm7
pmovmskb %xmm7, %rax
movaps %xmm5, %xmm7
palignr $1, %xmm4, %xmm5
test %rax, %rax
palignr $1, %xmm3, %xmm4
jnz L(Shl1Start)
# ifdef USE_AS_STRNCPY
sub $64, %r8
jbe L(StrncpyLeave1)
# endif
palignr $1, %xmm2, %xmm3
lea 64(%rcx), %rcx
palignr $1, %xmm1, %xmm2
movaps %xmm7, %xmm1
movaps %xmm5, 48(%rdx)
movaps %xmm4, 32(%rdx)
movaps %xmm3, 16(%rdx)
movaps %xmm2, (%rdx)
lea 64(%rdx), %rdx
jmp L(Shl1LoopStart)
L(Shl1LoopExit):
movdqu -1(%rcx), %xmm1
mov $15, %rsi
movdqu %xmm1, -1(%rdx)
jmp L(CopyFrom1To16Bytes)
.p2align 4
L(Shl2):
movaps -2(%rcx), %xmm1
movaps 14(%rcx), %xmm2
L(Shl2Start):
pcmpeqb %xmm2, %xmm0
pmovmskb %xmm0, %rax
movaps %xmm2, %xmm3
# ifdef USE_AS_STRNCPY
sub $16, %r8
jbe L(StrncpyExit2Case2OrCase3)
# endif
test %rax, %rax
jnz L(Shl2LoopExit)
palignr $2, %xmm1, %xmm2
movaps %xmm2, (%rdx)
movaps 30(%rcx), %xmm2
pcmpeqb %xmm2, %xmm0
lea 16(%rdx), %rdx
pmovmskb %xmm0, %rax
lea 16(%rcx), %rcx
movaps %xmm2, %xmm1
# ifdef USE_AS_STRNCPY
sub $16, %r8
jbe L(StrncpyExit2Case2OrCase3)
# endif
test %rax, %rax
jnz L(Shl2LoopExit)
palignr $2, %xmm3, %xmm2
movaps %xmm2, (%rdx)
movaps 30(%rcx), %xmm2
pcmpeqb %xmm2, %xmm0
lea 16(%rdx), %rdx
pmovmskb %xmm0, %rax
lea 16(%rcx), %rcx
movaps %xmm2, %xmm3
# ifdef USE_AS_STRNCPY
sub $16, %r8
jbe L(StrncpyExit2Case2OrCase3)
# endif
test %rax, %rax
jnz L(Shl2LoopExit)
palignr $2, %xmm1, %xmm2
movaps %xmm2, (%rdx)
movaps 30(%rcx), %xmm2
pcmpeqb %xmm2, %xmm0
lea 16(%rdx), %rdx
pmovmskb %xmm0, %rax
lea 16(%rcx), %rcx
# ifdef USE_AS_STRNCPY
sub $16, %r8
jbe L(StrncpyExit2Case2OrCase3)
# endif
test %rax, %rax
jnz L(Shl2LoopExit)
palignr $2, %xmm3, %xmm2
movaps %xmm2, (%rdx)
lea 30(%rcx), %rcx
lea 16(%rdx), %rdx
mov %rcx, %rax
and $-0x40, %rcx
sub %rcx, %rax
lea -14(%rcx), %rcx
sub %rax, %rdx
# ifdef USE_AS_STRNCPY
add %rax, %r8
# endif
movaps -2(%rcx), %xmm1
/* 64 bytes loop */
.p2align 4
L(Shl2LoopStart):
movaps 14(%rcx), %xmm2
movaps 30(%rcx), %xmm3
movaps %xmm3, %xmm6
movaps 46(%rcx), %xmm4
movaps %xmm4, %xmm7
movaps 62(%rcx), %xmm5
pminub %xmm2, %xmm6
pminub %xmm5, %xmm7
pminub %xmm6, %xmm7
pcmpeqb %xmm0, %xmm7
pmovmskb %xmm7, %rax
movaps %xmm5, %xmm7
palignr $2, %xmm4, %xmm5
test %rax, %rax
palignr $2, %xmm3, %xmm4
jnz L(Shl2Start)
# ifdef USE_AS_STRNCPY
sub $64, %r8
jbe L(StrncpyLeave2)
# endif
palignr $2, %xmm2, %xmm3
lea 64(%rcx), %rcx
palignr $2, %xmm1, %xmm2
movaps %xmm7, %xmm1
movaps %xmm5, 48(%rdx)
movaps %xmm4, 32(%rdx)
movaps %xmm3, 16(%rdx)
movaps %xmm2, (%rdx)
lea 64(%rdx), %rdx
jmp L(Shl2LoopStart)
L(Shl2LoopExit):
movdqu -2(%rcx), %xmm1
mov $14, %rsi
movdqu %xmm1, -2(%rdx)
jmp L(CopyFrom1To16Bytes)
.p2align 4
L(Shl3):
movaps -3(%rcx), %xmm1
movaps 13(%rcx), %xmm2
L(Shl3Start):
pcmpeqb %xmm2, %xmm0
pmovmskb %xmm0, %rax
movaps %xmm2, %xmm3
# ifdef USE_AS_STRNCPY
sub $16, %r8
jbe L(StrncpyExit3Case2OrCase3)
# endif
test %rax, %rax
jnz L(Shl3LoopExit)
palignr $3, %xmm1, %xmm2
movaps %xmm2, (%rdx)
movaps 29(%rcx), %xmm2
pcmpeqb %xmm2, %xmm0
lea 16(%rdx), %rdx
pmovmskb %xmm0, %rax
lea 16(%rcx), %rcx
movaps %xmm2, %xmm1
# ifdef USE_AS_STRNCPY
sub $16, %r8
jbe L(StrncpyExit3Case2OrCase3)
# endif
test %rax, %rax
jnz L(Shl3LoopExit)
palignr $3, %xmm3, %xmm2
movaps %xmm2, (%rdx)
movaps 29(%rcx), %xmm2
pcmpeqb %xmm2, %xmm0
lea 16(%rdx), %rdx
pmovmskb %xmm0, %rax
lea 16(%rcx), %rcx
movaps %xmm2, %xmm3
# ifdef USE_AS_STRNCPY
sub $16, %r8
jbe L(StrncpyExit3Case2OrCase3)
# endif
test %rax, %rax
jnz L(Shl3LoopExit)
palignr $3, %xmm1, %xmm2
movaps %xmm2, (%rdx)
movaps 29(%rcx), %xmm2
pcmpeqb %xmm2, %xmm0
lea 16(%rdx), %rdx
pmovmskb %xmm0, %rax
lea 16(%rcx), %rcx
# ifdef USE_AS_STRNCPY
sub $16, %r8
jbe L(StrncpyExit3Case2OrCase3)
# endif
test %rax, %rax
jnz L(Shl3LoopExit)
palignr $3, %xmm3, %xmm2
movaps %xmm2, (%rdx)
lea 29(%rcx), %rcx
lea 16(%rdx), %rdx
mov %rcx, %rax
and $-0x40, %rcx
sub %rcx, %rax
lea -13(%rcx), %rcx
sub %rax, %rdx
# ifdef USE_AS_STRNCPY
add %rax, %r8
# endif
movaps -3(%rcx), %xmm1
/* 64 bytes loop */
.p2align 4
L(Shl3LoopStart):
movaps 13(%rcx), %xmm2
movaps 29(%rcx), %xmm3
movaps %xmm3, %xmm6
movaps 45(%rcx), %xmm4
movaps %xmm4, %xmm7
movaps 61(%rcx), %xmm5
pminub %xmm2, %xmm6
pminub %xmm5, %xmm7
pminub %xmm6, %xmm7
pcmpeqb %xmm0, %xmm7
pmovmskb %xmm7, %rax
movaps %xmm5, %xmm7
palignr $3, %xmm4, %xmm5
test %rax, %rax
palignr $3, %xmm3, %xmm4
jnz L(Shl3Start)
# ifdef USE_AS_STRNCPY
sub $64, %r8
jbe L(StrncpyLeave3)
# endif
palignr $3, %xmm2, %xmm3
lea 64(%rcx), %rcx
palignr $3, %xmm1, %xmm2
movaps %xmm7, %xmm1
movaps %xmm5, 48(%rdx)
movaps %xmm4, 32(%rdx)
movaps %xmm3, 16(%rdx)
movaps %xmm2, (%rdx)
lea 64(%rdx), %rdx
jmp L(Shl3LoopStart)
L(Shl3LoopExit):
movdqu -3(%rcx), %xmm1
mov $13, %rsi
movdqu %xmm1, -3(%rdx)
jmp L(CopyFrom1To16Bytes)
.p2align 4
L(Shl4):
movaps -4(%rcx), %xmm1
movaps 12(%rcx), %xmm2
L(Shl4Start):
pcmpeqb %xmm2, %xmm0
pmovmskb %xmm0, %rax
movaps %xmm2, %xmm3
# ifdef USE_AS_STRNCPY
sub $16, %r8
jbe L(StrncpyExit4Case2OrCase3)
# endif
test %rax, %rax
jnz L(Shl4LoopExit)
palignr $4, %xmm1, %xmm2
movaps %xmm2, (%rdx)
movaps 28(%rcx), %xmm2
pcmpeqb %xmm2, %xmm0
lea 16(%rdx), %rdx
pmovmskb %xmm0, %rax
lea 16(%rcx), %rcx
movaps %xmm2, %xmm1
# ifdef USE_AS_STRNCPY
sub $16, %r8
jbe L(StrncpyExit4Case2OrCase3)
# endif
test %rax, %rax
jnz L(Shl4LoopExit)
palignr $4, %xmm3, %xmm2
movaps %xmm2, (%rdx)
movaps 28(%rcx), %xmm2
pcmpeqb %xmm2, %xmm0
lea 16(%rdx), %rdx
pmovmskb %xmm0, %rax
lea 16(%rcx), %rcx
movaps %xmm2, %xmm3
# ifdef USE_AS_STRNCPY
sub $16, %r8
jbe L(StrncpyExit4Case2OrCase3)
# endif
test %rax, %rax
jnz L(Shl4LoopExit)
palignr $4, %xmm1, %xmm2
movaps %xmm2, (%rdx)
movaps 28(%rcx), %xmm2
pcmpeqb %xmm2, %xmm0
lea 16(%rdx), %rdx
pmovmskb %xmm0, %rax
lea 16(%rcx), %rcx
# ifdef USE_AS_STRNCPY
sub $16, %r8
jbe L(StrncpyExit4Case2OrCase3)
# endif
test %rax, %rax
jnz L(Shl4LoopExit)
palignr $4, %xmm3, %xmm2
movaps %xmm2, (%rdx)
lea 28(%rcx), %rcx
lea 16(%rdx), %rdx
mov %rcx, %rax
and $-0x40, %rcx
sub %rcx, %rax
lea -12(%rcx), %rcx
sub %rax, %rdx
# ifdef USE_AS_STRNCPY
add %rax, %r8
# endif
movaps -4(%rcx), %xmm1
/* 64 bytes loop */
.p2align 4
L(Shl4LoopStart):
movaps 12(%rcx), %xmm2
movaps 28(%rcx), %xmm3
movaps %xmm3, %xmm6
movaps 44(%rcx), %xmm4
movaps %xmm4, %xmm7
movaps 60(%rcx), %xmm5
pminub %xmm2, %xmm6
pminub %xmm5, %xmm7
pminub %xmm6, %xmm7
pcmpeqb %xmm0, %xmm7
pmovmskb %xmm7, %rax
movaps %xmm5, %xmm7
palignr $4, %xmm4, %xmm5
test %rax, %rax
palignr $4, %xmm3, %xmm4
jnz L(Shl4Start)
# ifdef USE_AS_STRNCPY
sub $64, %r8
jbe L(StrncpyLeave4)
# endif
palignr $4, %xmm2, %xmm3
lea 64(%rcx), %rcx
palignr $4, %xmm1, %xmm2
movaps %xmm7, %xmm1
movaps %xmm5, 48(%rdx)
movaps %xmm4, 32(%rdx)
movaps %xmm3, 16(%rdx)
movaps %xmm2, (%rdx)
lea 64(%rdx), %rdx
jmp L(Shl4LoopStart)
L(Shl4LoopExit):
movdqu -4(%rcx), %xmm1
mov $12, %rsi
movdqu %xmm1, -4(%rdx)
jmp L(CopyFrom1To16Bytes)
.p2align 4
L(Shl5):
movaps -5(%rcx), %xmm1
movaps 11(%rcx), %xmm2
L(Shl5Start):
pcmpeqb %xmm2, %xmm0
pmovmskb %xmm0, %rax
movaps %xmm2, %xmm3
# ifdef USE_AS_STRNCPY
sub $16, %r8
jbe L(StrncpyExit5Case2OrCase3)
# endif
test %rax, %rax
jnz L(Shl5LoopExit)
palignr $5, %xmm1, %xmm2
movaps %xmm2, (%rdx)
movaps 27(%rcx), %xmm2
pcmpeqb %xmm2, %xmm0
lea 16(%rdx), %rdx
pmovmskb %xmm0, %rax
lea 16(%rcx), %rcx
movaps %xmm2, %xmm1
# ifdef USE_AS_STRNCPY
sub $16, %r8
jbe L(StrncpyExit5Case2OrCase3)
# endif
test %rax, %rax
jnz L(Shl5LoopExit)
palignr $5, %xmm3, %xmm2
movaps %xmm2, (%rdx)
movaps 27(%rcx), %xmm2
pcmpeqb %xmm2, %xmm0
lea 16(%rdx), %rdx
pmovmskb %xmm0, %rax
lea 16(%rcx), %rcx
movaps %xmm2, %xmm3
# ifdef USE_AS_STRNCPY
sub $16, %r8
jbe L(StrncpyExit5Case2OrCase3)
# endif
test %rax, %rax
jnz L(Shl5LoopExit)
palignr $5, %xmm1, %xmm2
movaps %xmm2, (%rdx)
movaps 27(%rcx), %xmm2
pcmpeqb %xmm2, %xmm0
lea 16(%rdx), %rdx
pmovmskb %xmm0, %rax
lea 16(%rcx), %rcx
# ifdef USE_AS_STRNCPY
sub $16, %r8
jbe L(StrncpyExit5Case2OrCase3)
# endif
test %rax, %rax
jnz L(Shl5LoopExit)
palignr $5, %xmm3, %xmm2
movaps %xmm2, (%rdx)
lea 27(%rcx), %rcx
lea 16(%rdx), %rdx
mov %rcx, %rax
and $-0x40, %rcx
sub %rcx, %rax
lea -11(%rcx), %rcx
sub %rax, %rdx
# ifdef USE_AS_STRNCPY
add %rax, %r8
# endif
movaps -5(%rcx), %xmm1
/* 64 bytes loop */
.p2align 4
L(Shl5LoopStart):
movaps 11(%rcx), %xmm2
movaps 27(%rcx), %xmm3
movaps %xmm3, %xmm6
movaps 43(%rcx), %xmm4
movaps %xmm4, %xmm7
movaps 59(%rcx), %xmm5
pminub %xmm2, %xmm6
pminub %xmm5, %xmm7
pminub %xmm6, %xmm7
pcmpeqb %xmm0, %xmm7
pmovmskb %xmm7, %rax
movaps %xmm5, %xmm7
palignr $5, %xmm4, %xmm5
test %rax, %rax
palignr $5, %xmm3, %xmm4
jnz L(Shl5Start)
# ifdef USE_AS_STRNCPY
sub $64, %r8
jbe L(StrncpyLeave5)
# endif
palignr $5, %xmm2, %xmm3
lea 64(%rcx), %rcx
palignr $5, %xmm1, %xmm2
movaps %xmm7, %xmm1
movaps %xmm5, 48(%rdx)
movaps %xmm4, 32(%rdx)
movaps %xmm3, 16(%rdx)
movaps %xmm2, (%rdx)
lea 64(%rdx), %rdx
jmp L(Shl5LoopStart)
L(Shl5LoopExit):
movdqu -5(%rcx), %xmm1
mov $11, %rsi
movdqu %xmm1, -5(%rdx)
jmp L(CopyFrom1To16Bytes)
.p2align 4
L(Shl6):
movaps -6(%rcx), %xmm1
movaps 10(%rcx), %xmm2
L(Shl6Start):
pcmpeqb %xmm2, %xmm0
pmovmskb %xmm0, %rax
movaps %xmm2, %xmm3
# ifdef USE_AS_STRNCPY
sub $16, %r8
jbe L(StrncpyExit6Case2OrCase3)
# endif
test %rax, %rax
jnz L(Shl6LoopExit)
palignr $6, %xmm1, %xmm2
movaps %xmm2, (%rdx)
movaps 26(%rcx), %xmm2
pcmpeqb %xmm2, %xmm0
lea 16(%rdx), %rdx
pmovmskb %xmm0, %rax
lea 16(%rcx), %rcx
movaps %xmm2, %xmm1
# ifdef USE_AS_STRNCPY
sub $16, %r8
jbe L(StrncpyExit6Case2OrCase3)
# endif
test %rax, %rax
jnz L(Shl6LoopExit)
palignr $6, %xmm3, %xmm2
movaps %xmm2, (%rdx)
movaps 26(%rcx), %xmm2
pcmpeqb %xmm2, %xmm0
lea 16(%rdx), %rdx
pmovmskb %xmm0, %rax
lea 16(%rcx), %rcx
movaps %xmm2, %xmm3
# ifdef USE_AS_STRNCPY
sub $16, %r8
jbe L(StrncpyExit6Case2OrCase3)
# endif
test %rax, %rax
jnz L(Shl6LoopExit)
palignr $6, %xmm1, %xmm2
movaps %xmm2, (%rdx)
movaps 26(%rcx), %xmm2
pcmpeqb %xmm2, %xmm0
lea 16(%rdx), %rdx
pmovmskb %xmm0, %rax
lea 16(%rcx), %rcx
# ifdef USE_AS_STRNCPY
sub $16, %r8
jbe L(StrncpyExit6Case2OrCase3)
# endif
test %rax, %rax
jnz L(Shl6LoopExit)
palignr $6, %xmm3, %xmm2
movaps %xmm2, (%rdx)
lea 26(%rcx), %rcx
lea 16(%rdx), %rdx
mov %rcx, %rax
and $-0x40, %rcx
sub %rcx, %rax
lea -10(%rcx), %rcx
sub %rax, %rdx
# ifdef USE_AS_STRNCPY
add %rax, %r8
# endif
movaps -6(%rcx), %xmm1
/* 64 bytes loop */
.p2align 4
L(Shl6LoopStart):
movaps 10(%rcx), %xmm2
movaps 26(%rcx), %xmm3
movaps %xmm3, %xmm6
movaps 42(%rcx), %xmm4
movaps %xmm4, %xmm7
movaps 58(%rcx), %xmm5
pminub %xmm2, %xmm6
pminub %xmm5, %xmm7
pminub %xmm6, %xmm7
pcmpeqb %xmm0, %xmm7
pmovmskb %xmm7, %rax
movaps %xmm5, %xmm7
palignr $6, %xmm4, %xmm5
test %rax, %rax
palignr $6, %xmm3, %xmm4
jnz L(Shl6Start)
# ifdef USE_AS_STRNCPY
sub $64, %r8
jbe L(StrncpyLeave6)
# endif
palignr $6, %xmm2, %xmm3
lea 64(%rcx), %rcx
palignr $6, %xmm1, %xmm2
movaps %xmm7, %xmm1
movaps %xmm5, 48(%rdx)
movaps %xmm4, 32(%rdx)
movaps %xmm3, 16(%rdx)
movaps %xmm2, (%rdx)
lea 64(%rdx), %rdx
jmp L(Shl6LoopStart)
L(Shl6LoopExit):
mov (%rcx), %r9
mov 6(%rcx), %esi
mov %r9, (%rdx)
mov %esi, 6(%rdx)
mov $10, %rsi
jmp L(CopyFrom1To16Bytes)
.p2align 4
L(Shl7):
movaps -7(%rcx), %xmm1
movaps 9(%rcx), %xmm2
L(Shl7Start):
pcmpeqb %xmm2, %xmm0
pmovmskb %xmm0, %rax
movaps %xmm2, %xmm3
# ifdef USE_AS_STRNCPY
sub $16, %r8
jbe L(StrncpyExit7Case2OrCase3)
# endif
test %rax, %rax
jnz L(Shl7LoopExit)
palignr $7, %xmm1, %xmm2
movaps %xmm2, (%rdx)
movaps 25(%rcx), %xmm2
pcmpeqb %xmm2, %xmm0
lea 16(%rdx), %rdx
pmovmskb %xmm0, %rax
lea 16(%rcx), %rcx
movaps %xmm2, %xmm1
# ifdef USE_AS_STRNCPY
sub $16, %r8
jbe L(StrncpyExit7Case2OrCase3)
# endif
test %rax, %rax
jnz L(Shl7LoopExit)
palignr $7, %xmm3, %xmm2
movaps %xmm2, (%rdx)
movaps 25(%rcx), %xmm2
pcmpeqb %xmm2, %xmm0
lea 16(%rdx), %rdx
pmovmskb %xmm0, %rax
lea 16(%rcx), %rcx
movaps %xmm2, %xmm3
# ifdef USE_AS_STRNCPY
sub $16, %r8
jbe L(StrncpyExit7Case2OrCase3)
# endif
test %rax, %rax
jnz L(Shl7LoopExit)
palignr $7, %xmm1, %xmm2
movaps %xmm2, (%rdx)
movaps 25(%rcx), %xmm2
pcmpeqb %xmm2, %xmm0
lea 16(%rdx), %rdx
pmovmskb %xmm0, %rax
lea 16(%rcx), %rcx
# ifdef USE_AS_STRNCPY
sub $16, %r8
jbe L(StrncpyExit7Case2OrCase3)
# endif
test %rax, %rax
jnz L(Shl7LoopExit)
palignr $7, %xmm3, %xmm2
movaps %xmm2, (%rdx)
lea 25(%rcx), %rcx
lea 16(%rdx), %rdx
mov %rcx, %rax
and $-0x40, %rcx
sub %rcx, %rax
lea -9(%rcx), %rcx
sub %rax, %rdx
# ifdef USE_AS_STRNCPY
add %rax, %r8
# endif
movaps -7(%rcx), %xmm1
/* 64 bytes loop */
.p2align 4
L(Shl7LoopStart):
movaps 9(%rcx), %xmm2
movaps 25(%rcx), %xmm3
movaps %xmm3, %xmm6
movaps 41(%rcx), %xmm4
movaps %xmm4, %xmm7
movaps 57(%rcx), %xmm5
pminub %xmm2, %xmm6
pminub %xmm5, %xmm7
pminub %xmm6, %xmm7
pcmpeqb %xmm0, %xmm7
pmovmskb %xmm7, %rax
movaps %xmm5, %xmm7
palignr $7, %xmm4, %xmm5
test %rax, %rax
palignr $7, %xmm3, %xmm4
jnz L(Shl7Start)
# ifdef USE_AS_STRNCPY
sub $64, %r8
jbe L(StrncpyLeave7)
# endif
palignr $7, %xmm2, %xmm3
lea 64(%rcx), %rcx
palignr $7, %xmm1, %xmm2
movaps %xmm7, %xmm1
movaps %xmm5, 48(%rdx)
movaps %xmm4, 32(%rdx)
movaps %xmm3, 16(%rdx)
movaps %xmm2, (%rdx)
lea 64(%rdx), %rdx
jmp L(Shl7LoopStart)
L(Shl7LoopExit):
mov (%rcx), %r9
mov 5(%rcx), %esi
mov %r9, (%rdx)
mov %esi, 5(%rdx)
mov $9, %rsi
jmp L(CopyFrom1To16Bytes)
.p2align 4
L(Shl8):
movaps -8(%rcx), %xmm1
movaps 8(%rcx), %xmm2
L(Shl8Start):
pcmpeqb %xmm2, %xmm0
pmovmskb %xmm0, %rax
movaps %xmm2, %xmm3
# ifdef USE_AS_STRNCPY
sub $16, %r8
jbe L(StrncpyExit8Case2OrCase3)
# endif
test %rax, %rax
jnz L(Shl8LoopExit)
palignr $8, %xmm1, %xmm2
movaps %xmm2, (%rdx)
movaps 24(%rcx), %xmm2
pcmpeqb %xmm2, %xmm0
lea 16(%rdx), %rdx
pmovmskb %xmm0, %rax
lea 16(%rcx), %rcx
movaps %xmm2, %xmm1
# ifdef USE_AS_STRNCPY
sub $16, %r8
jbe L(StrncpyExit8Case2OrCase3)
# endif
test %rax, %rax
jnz L(Shl8LoopExit)
palignr $8, %xmm3, %xmm2
movaps %xmm2, (%rdx)
movaps 24(%rcx), %xmm2
pcmpeqb %xmm2, %xmm0
lea 16(%rdx), %rdx
pmovmskb %xmm0, %rax
lea 16(%rcx), %rcx
movaps %xmm2, %xmm3
# ifdef USE_AS_STRNCPY
sub $16, %r8
jbe L(StrncpyExit8Case2OrCase3)
# endif
test %rax, %rax
jnz L(Shl8LoopExit)
palignr $8, %xmm1, %xmm2
movaps %xmm2, (%rdx)
movaps 24(%rcx), %xmm2
pcmpeqb %xmm2, %xmm0
lea 16(%rdx), %rdx
pmovmskb %xmm0, %rax
lea 16(%rcx), %rcx
# ifdef USE_AS_STRNCPY
sub $16, %r8
jbe L(StrncpyExit8Case2OrCase3)
# endif
test %rax, %rax
jnz L(Shl8LoopExit)
palignr $8, %xmm3, %xmm2
movaps %xmm2, (%rdx)
lea 24(%rcx), %rcx
lea 16(%rdx), %rdx
mov %rcx, %rax
and $-0x40, %rcx
sub %rcx, %rax
lea -8(%rcx), %rcx
sub %rax, %rdx
# ifdef USE_AS_STRNCPY
add %rax, %r8
# endif
movaps -8(%rcx), %xmm1
/* 64 bytes loop */
.p2align 4
L(Shl8LoopStart):
movaps 8(%rcx), %xmm2
movaps 24(%rcx), %xmm3
movaps %xmm3, %xmm6
movaps 40(%rcx), %xmm4
movaps %xmm4, %xmm7
movaps 56(%rcx), %xmm5
pminub %xmm2, %xmm6
pminub %xmm5, %xmm7
pminub %xmm6, %xmm7
pcmpeqb %xmm0, %xmm7
pmovmskb %xmm7, %rax
movaps %xmm5, %xmm7
palignr $8, %xmm4, %xmm5
test %rax, %rax
palignr $8, %xmm3, %xmm4
jnz L(Shl8Start)
# ifdef USE_AS_STRNCPY
sub $64, %r8
jbe L(StrncpyLeave8)
# endif
palignr $8, %xmm2, %xmm3
lea 64(%rcx), %rcx
palignr $8, %xmm1, %xmm2
movaps %xmm7, %xmm1
movaps %xmm5, 48(%rdx)
movaps %xmm4, 32(%rdx)
movaps %xmm3, 16(%rdx)
movaps %xmm2, (%rdx)
lea 64(%rdx), %rdx
jmp L(Shl8LoopStart)
L(Shl8LoopExit):
mov (%rcx), %r9
mov $8, %rsi
mov %r9, (%rdx)
jmp L(CopyFrom1To16Bytes)
.p2align 4
L(Shl9):
movaps -9(%rcx), %xmm1
movaps 7(%rcx), %xmm2
L(Shl9Start):
pcmpeqb %xmm2, %xmm0
pmovmskb %xmm0, %rax
movaps %xmm2, %xmm3
# ifdef USE_AS_STRNCPY
sub $16, %r8
jbe L(StrncpyExit9Case2OrCase3)
# endif
test %rax, %rax
jnz L(Shl9LoopExit)
palignr $9, %xmm1, %xmm2
movaps %xmm2, (%rdx)
movaps 23(%rcx), %xmm2
pcmpeqb %xmm2, %xmm0
lea 16(%rdx), %rdx
pmovmskb %xmm0, %rax
lea 16(%rcx), %rcx
movaps %xmm2, %xmm1
# ifdef USE_AS_STRNCPY
sub $16, %r8
jbe L(StrncpyExit9Case2OrCase3)
# endif
test %rax, %rax
jnz L(Shl9LoopExit)
palignr $9, %xmm3, %xmm2
movaps %xmm2, (%rdx)
movaps 23(%rcx), %xmm2
pcmpeqb %xmm2, %xmm0
lea 16(%rdx), %rdx
pmovmskb %xmm0, %rax
lea 16(%rcx), %rcx
movaps %xmm2, %xmm3
# ifdef USE_AS_STRNCPY
sub $16, %r8
jbe L(StrncpyExit9Case2OrCase3)
# endif
test %rax, %rax
jnz L(Shl9LoopExit)
palignr $9, %xmm1, %xmm2
movaps %xmm2, (%rdx)
movaps 23(%rcx), %xmm2
pcmpeqb %xmm2, %xmm0
lea 16(%rdx), %rdx
pmovmskb %xmm0, %rax
lea 16(%rcx), %rcx
# ifdef USE_AS_STRNCPY
sub $16, %r8
jbe L(StrncpyExit9Case2OrCase3)
# endif
test %rax, %rax
jnz L(Shl9LoopExit)
palignr $9, %xmm3, %xmm2
movaps %xmm2, (%rdx)
lea 23(%rcx), %rcx
lea 16(%rdx), %rdx
mov %rcx, %rax
and $-0x40, %rcx
sub %rcx, %rax
lea -7(%rcx), %rcx
sub %rax, %rdx
# ifdef USE_AS_STRNCPY
add %rax, %r8
# endif
movaps -9(%rcx), %xmm1
/* 64 bytes loop */
.p2align 4
L(Shl9LoopStart):
movaps 7(%rcx), %xmm2
movaps 23(%rcx), %xmm3
movaps %xmm3, %xmm6
movaps 39(%rcx), %xmm4
movaps %xmm4, %xmm7
movaps 55(%rcx), %xmm5
pminub %xmm2, %xmm6
pminub %xmm5, %xmm7
pminub %xmm6, %xmm7
pcmpeqb %xmm0, %xmm7
pmovmskb %xmm7, %rax
movaps %xmm5, %xmm7
palignr $9, %xmm4, %xmm5
test %rax, %rax
palignr $9, %xmm3, %xmm4
jnz L(Shl9Start)
# ifdef USE_AS_STRNCPY
sub $64, %r8
jbe L(StrncpyLeave9)
# endif
palignr $9, %xmm2, %xmm3
lea 64(%rcx), %rcx
palignr $9, %xmm1, %xmm2
movaps %xmm7, %xmm1
movaps %xmm5, 48(%rdx)
movaps %xmm4, 32(%rdx)
movaps %xmm3, 16(%rdx)
movaps %xmm2, (%rdx)
lea 64(%rdx), %rdx
jmp L(Shl9LoopStart)
L(Shl9LoopExit):
mov -1(%rcx), %r9
mov $7, %rsi
mov %r9, -1(%rdx)
jmp L(CopyFrom1To16Bytes)
.p2align 4
L(Shl10):
movaps -10(%rcx), %xmm1
movaps 6(%rcx), %xmm2
L(Shl10Start):
pcmpeqb %xmm2, %xmm0
pmovmskb %xmm0, %rax
movaps %xmm2, %xmm3
# ifdef USE_AS_STRNCPY
sub $16, %r8
jbe L(StrncpyExit10Case2OrCase3)
# endif
test %rax, %rax
jnz L(Shl10LoopExit)
palignr $10, %xmm1, %xmm2
movaps %xmm2, (%rdx)
movaps 22(%rcx), %xmm2
pcmpeqb %xmm2, %xmm0
lea 16(%rdx), %rdx
pmovmskb %xmm0, %rax
lea 16(%rcx), %rcx
movaps %xmm2, %xmm1
# ifdef USE_AS_STRNCPY
sub $16, %r8
jbe L(StrncpyExit10Case2OrCase3)
# endif
test %rax, %rax
jnz L(Shl10LoopExit)
palignr $10, %xmm3, %xmm2
movaps %xmm2, (%rdx)
movaps 22(%rcx), %xmm2
pcmpeqb %xmm2, %xmm0
lea 16(%rdx), %rdx
pmovmskb %xmm0, %rax
lea 16(%rcx), %rcx
movaps %xmm2, %xmm3
# ifdef USE_AS_STRNCPY
sub $16, %r8
jbe L(StrncpyExit10Case2OrCase3)
# endif
test %rax, %rax
jnz L(Shl10LoopExit)
palignr $10, %xmm1, %xmm2
movaps %xmm2, (%rdx)
movaps 22(%rcx), %xmm2
pcmpeqb %xmm2, %xmm0
lea 16(%rdx), %rdx
pmovmskb %xmm0, %rax
lea 16(%rcx), %rcx
# ifdef USE_AS_STRNCPY
sub $16, %r8
jbe L(StrncpyExit10Case2OrCase3)
# endif
test %rax, %rax
jnz L(Shl10LoopExit)
palignr $10, %xmm3, %xmm2
movaps %xmm2, (%rdx)
lea 22(%rcx), %rcx
lea 16(%rdx), %rdx
mov %rcx, %rax
and $-0x40, %rcx
sub %rcx, %rax
lea -6(%rcx), %rcx
sub %rax, %rdx
# ifdef USE_AS_STRNCPY
add %rax, %r8
# endif
movaps -10(%rcx), %xmm1
/* 64 bytes loop */
.p2align 4
L(Shl10LoopStart):
movaps 6(%rcx), %xmm2
movaps 22(%rcx), %xmm3
movaps %xmm3, %xmm6
movaps 38(%rcx), %xmm4
movaps %xmm4, %xmm7
movaps 54(%rcx), %xmm5
pminub %xmm2, %xmm6
pminub %xmm5, %xmm7
pminub %xmm6, %xmm7
pcmpeqb %xmm0, %xmm7
pmovmskb %xmm7, %rax
movaps %xmm5, %xmm7
palignr $10, %xmm4, %xmm5
test %rax, %rax
palignr $10, %xmm3, %xmm4
jnz L(Shl10Start)
# ifdef USE_AS_STRNCPY
sub $64, %r8
jbe L(StrncpyLeave10)
# endif
palignr $10, %xmm2, %xmm3
lea 64(%rcx), %rcx
palignr $10, %xmm1, %xmm2
movaps %xmm7, %xmm1
movaps %xmm5, 48(%rdx)
movaps %xmm4, 32(%rdx)
movaps %xmm3, 16(%rdx)
movaps %xmm2, (%rdx)
lea 64(%rdx), %rdx
jmp L(Shl10LoopStart)
L(Shl10LoopExit):
mov -2(%rcx), %r9
mov $6, %rsi
mov %r9, -2(%rdx)
jmp L(CopyFrom1To16Bytes)
.p2align 4
L(Shl11):
movaps -11(%rcx), %xmm1
movaps 5(%rcx), %xmm2
L(Shl11Start):
pcmpeqb %xmm2, %xmm0
pmovmskb %xmm0, %rax
movaps %xmm2, %xmm3
# ifdef USE_AS_STRNCPY
sub $16, %r8
jbe L(StrncpyExit11Case2OrCase3)
# endif
test %rax, %rax
jnz L(Shl11LoopExit)
palignr $11, %xmm1, %xmm2
movaps %xmm2, (%rdx)
movaps 21(%rcx), %xmm2
pcmpeqb %xmm2, %xmm0
lea 16(%rdx), %rdx
pmovmskb %xmm0, %rax
lea 16(%rcx), %rcx
movaps %xmm2, %xmm1
# ifdef USE_AS_STRNCPY
sub $16, %r8
jbe L(StrncpyExit11Case2OrCase3)
# endif
test %rax, %rax
jnz L(Shl11LoopExit)
palignr $11, %xmm3, %xmm2
movaps %xmm2, (%rdx)
movaps 21(%rcx), %xmm2
pcmpeqb %xmm2, %xmm0
lea 16(%rdx), %rdx
pmovmskb %xmm0, %rax
lea 16(%rcx), %rcx
movaps %xmm2, %xmm3
# ifdef USE_AS_STRNCPY
sub $16, %r8
jbe L(StrncpyExit11Case2OrCase3)
# endif
test %rax, %rax
jnz L(Shl11LoopExit)
palignr $11, %xmm1, %xmm2
movaps %xmm2, (%rdx)
movaps 21(%rcx), %xmm2
pcmpeqb %xmm2, %xmm0
lea 16(%rdx), %rdx
pmovmskb %xmm0, %rax
lea 16(%rcx), %rcx
# ifdef USE_AS_STRNCPY
sub $16, %r8
jbe L(StrncpyExit11Case2OrCase3)
# endif
test %rax, %rax
jnz L(Shl11LoopExit)
palignr $11, %xmm3, %xmm2
movaps %xmm2, (%rdx)
lea 21(%rcx), %rcx
lea 16(%rdx), %rdx
mov %rcx, %rax
and $-0x40, %rcx
sub %rcx, %rax
lea -5(%rcx), %rcx
sub %rax, %rdx
# ifdef USE_AS_STRNCPY
add %rax, %r8
# endif
movaps -11(%rcx), %xmm1
/* 64 bytes loop */
.p2align 4
L(Shl11LoopStart):
movaps 5(%rcx), %xmm2
movaps 21(%rcx), %xmm3
movaps %xmm3, %xmm6
movaps 37(%rcx), %xmm4
movaps %xmm4, %xmm7
movaps 53(%rcx), %xmm5
pminub %xmm2, %xmm6
pminub %xmm5, %xmm7
pminub %xmm6, %xmm7
pcmpeqb %xmm0, %xmm7
pmovmskb %xmm7, %rax
movaps %xmm5, %xmm7
palignr $11, %xmm4, %xmm5
test %rax, %rax
palignr $11, %xmm3, %xmm4
jnz L(Shl11Start)
# ifdef USE_AS_STRNCPY
sub $64, %r8
jbe L(StrncpyLeave11)
# endif
palignr $11, %xmm2, %xmm3
lea 64(%rcx), %rcx
palignr $11, %xmm1, %xmm2
movaps %xmm7, %xmm1
movaps %xmm5, 48(%rdx)
movaps %xmm4, 32(%rdx)
movaps %xmm3, 16(%rdx)
movaps %xmm2, (%rdx)
lea 64(%rdx), %rdx
jmp L(Shl11LoopStart)
L(Shl11LoopExit):
mov -3(%rcx), %r9
mov $5, %rsi
mov %r9, -3(%rdx)
jmp L(CopyFrom1To16Bytes)
.p2align 4
L(Shl12):
movaps -12(%rcx), %xmm1
movaps 4(%rcx), %xmm2
L(Shl12Start):
pcmpeqb %xmm2, %xmm0
pmovmskb %xmm0, %rax
movaps %xmm2, %xmm3
# ifdef USE_AS_STRNCPY
sub $16, %r8
jbe L(StrncpyExit12Case2OrCase3)
# endif
test %rax, %rax
jnz L(Shl12LoopExit)
palignr $12, %xmm1, %xmm2
movaps %xmm2, (%rdx)
movaps 20(%rcx), %xmm2
pcmpeqb %xmm2, %xmm0
lea 16(%rdx), %rdx
pmovmskb %xmm0, %rax
lea 16(%rcx), %rcx
movaps %xmm2, %xmm1
# ifdef USE_AS_STRNCPY
sub $16, %r8
jbe L(StrncpyExit12Case2OrCase3)
# endif
test %rax, %rax
jnz L(Shl12LoopExit)
palignr $12, %xmm3, %xmm2
movaps %xmm2, (%rdx)
movaps 20(%rcx), %xmm2
pcmpeqb %xmm2, %xmm0
lea 16(%rdx), %rdx
pmovmskb %xmm0, %rax
lea 16(%rcx), %rcx
movaps %xmm2, %xmm3
# ifdef USE_AS_STRNCPY
sub $16, %r8
jbe L(StrncpyExit12Case2OrCase3)
# endif
test %rax, %rax
jnz L(Shl12LoopExit)
palignr $12, %xmm1, %xmm2
movaps %xmm2, (%rdx)
movaps 20(%rcx), %xmm2
pcmpeqb %xmm2, %xmm0
lea 16(%rdx), %rdx
pmovmskb %xmm0, %rax
lea 16(%rcx), %rcx
# ifdef USE_AS_STRNCPY
sub $16, %r8
jbe L(StrncpyExit12Case2OrCase3)
# endif
test %rax, %rax
jnz L(Shl12LoopExit)
palignr $12, %xmm3, %xmm2
movaps %xmm2, (%rdx)
lea 20(%rcx), %rcx
lea 16(%rdx), %rdx
mov %rcx, %rax
and $-0x40, %rcx
sub %rcx, %rax
lea -4(%rcx), %rcx
sub %rax, %rdx
# ifdef USE_AS_STRNCPY
add %rax, %r8
# endif
movaps -12(%rcx), %xmm1
/* 64 bytes loop */
.p2align 4
L(Shl12LoopStart):
movaps 4(%rcx), %xmm2
movaps 20(%rcx), %xmm3
movaps %xmm3, %xmm6
movaps 36(%rcx), %xmm4
movaps %xmm4, %xmm7
movaps 52(%rcx), %xmm5
pminub %xmm2, %xmm6
pminub %xmm5, %xmm7
pminub %xmm6, %xmm7
pcmpeqb %xmm0, %xmm7
pmovmskb %xmm7, %rax
movaps %xmm5, %xmm7
palignr $12, %xmm4, %xmm5
test %rax, %rax
palignr $12, %xmm3, %xmm4
jnz L(Shl12Start)
# ifdef USE_AS_STRNCPY
sub $64, %r8
jbe L(StrncpyLeave12)
# endif
palignr $12, %xmm2, %xmm3
lea 64(%rcx), %rcx
palignr $12, %xmm1, %xmm2
movaps %xmm7, %xmm1
movaps %xmm5, 48(%rdx)
movaps %xmm4, 32(%rdx)
movaps %xmm3, 16(%rdx)
movaps %xmm2, (%rdx)
lea 64(%rdx), %rdx
jmp L(Shl12LoopStart)
L(Shl12LoopExit):
mov (%rcx), %r9d
mov $4, %rsi
mov %r9d, (%rdx)
jmp L(CopyFrom1To16Bytes)
.p2align 4
L(Shl13):
movaps -13(%rcx), %xmm1
movaps 3(%rcx), %xmm2
L(Shl13Start):
pcmpeqb %xmm2, %xmm0
pmovmskb %xmm0, %rax
movaps %xmm2, %xmm3
# ifdef USE_AS_STRNCPY
sub $16, %r8
jbe L(StrncpyExit13Case2OrCase3)
# endif
test %rax, %rax
jnz L(Shl13LoopExit)
palignr $13, %xmm1, %xmm2
movaps %xmm2, (%rdx)
movaps 19(%rcx), %xmm2
pcmpeqb %xmm2, %xmm0
lea 16(%rdx), %rdx
pmovmskb %xmm0, %rax
lea 16(%rcx), %rcx
movaps %xmm2, %xmm1
# ifdef USE_AS_STRNCPY
sub $16, %r8
jbe L(StrncpyExit13Case2OrCase3)
# endif
test %rax, %rax
jnz L(Shl13LoopExit)
palignr $13, %xmm3, %xmm2
movaps %xmm2, (%rdx)
movaps 19(%rcx), %xmm2
pcmpeqb %xmm2, %xmm0
lea 16(%rdx), %rdx
pmovmskb %xmm0, %rax
lea 16(%rcx), %rcx
movaps %xmm2, %xmm3
# ifdef USE_AS_STRNCPY
sub $16, %r8
jbe L(StrncpyExit13Case2OrCase3)
# endif
test %rax, %rax
jnz L(Shl13LoopExit)
palignr $13, %xmm1, %xmm2
movaps %xmm2, (%rdx)
movaps 19(%rcx), %xmm2
pcmpeqb %xmm2, %xmm0
lea 16(%rdx), %rdx
pmovmskb %xmm0, %rax
lea 16(%rcx), %rcx
# ifdef USE_AS_STRNCPY
sub $16, %r8
jbe L(StrncpyExit13Case2OrCase3)
# endif
test %rax, %rax
jnz L(Shl13LoopExit)
palignr $13, %xmm3, %xmm2
movaps %xmm2, (%rdx)
lea 19(%rcx), %rcx
lea 16(%rdx), %rdx
mov %rcx, %rax
and $-0x40, %rcx
sub %rcx, %rax
lea -3(%rcx), %rcx
sub %rax, %rdx
# ifdef USE_AS_STRNCPY
add %rax, %r8
# endif
movaps -13(%rcx), %xmm1
/* 64 bytes loop */
.p2align 4
L(Shl13LoopStart):
movaps 3(%rcx), %xmm2
movaps 19(%rcx), %xmm3
movaps %xmm3, %xmm6
movaps 35(%rcx), %xmm4
movaps %xmm4, %xmm7
movaps 51(%rcx), %xmm5
pminub %xmm2, %xmm6
pminub %xmm5, %xmm7
pminub %xmm6, %xmm7
pcmpeqb %xmm0, %xmm7
pmovmskb %xmm7, %rax
movaps %xmm5, %xmm7
palignr $13, %xmm4, %xmm5
test %rax, %rax
palignr $13, %xmm3, %xmm4
jnz L(Shl13Start)
# ifdef USE_AS_STRNCPY
sub $64, %r8
jbe L(StrncpyLeave13)
# endif
palignr $13, %xmm2, %xmm3
lea 64(%rcx), %rcx
palignr $13, %xmm1, %xmm2
movaps %xmm7, %xmm1
movaps %xmm5, 48(%rdx)
movaps %xmm4, 32(%rdx)
movaps %xmm3, 16(%rdx)
movaps %xmm2, (%rdx)
lea 64(%rdx), %rdx
jmp L(Shl13LoopStart)
L(Shl13LoopExit):
mov -1(%rcx), %r9d
mov $3, %rsi
mov %r9d, -1(%rdx)
jmp L(CopyFrom1To16Bytes)
.p2align 4
L(Shl14):
movaps -14(%rcx), %xmm1
movaps 2(%rcx), %xmm2
L(Shl14Start):
pcmpeqb %xmm2, %xmm0
pmovmskb %xmm0, %rax
movaps %xmm2, %xmm3
# ifdef USE_AS_STRNCPY
sub $16, %r8
jbe L(StrncpyExit14Case2OrCase3)
# endif
test %rax, %rax
jnz L(Shl14LoopExit)
palignr $14, %xmm1, %xmm2
movaps %xmm2, (%rdx)
movaps 18(%rcx), %xmm2
pcmpeqb %xmm2, %xmm0
lea 16(%rdx), %rdx
pmovmskb %xmm0, %rax
lea 16(%rcx), %rcx
movaps %xmm2, %xmm1
# ifdef USE_AS_STRNCPY
sub $16, %r8
jbe L(StrncpyExit14Case2OrCase3)
# endif
test %rax, %rax
jnz L(Shl14LoopExit)
palignr $14, %xmm3, %xmm2
movaps %xmm2, (%rdx)
movaps 18(%rcx), %xmm2
pcmpeqb %xmm2, %xmm0
lea 16(%rdx), %rdx
pmovmskb %xmm0, %rax
lea 16(%rcx), %rcx
movaps %xmm2, %xmm3
# ifdef USE_AS_STRNCPY
sub $16, %r8
jbe L(StrncpyExit14Case2OrCase3)
# endif
test %rax, %rax
jnz L(Shl14LoopExit)
palignr $14, %xmm1, %xmm2
movaps %xmm2, (%rdx)
movaps 18(%rcx), %xmm2
pcmpeqb %xmm2, %xmm0
lea 16(%rdx), %rdx
pmovmskb %xmm0, %rax
lea 16(%rcx), %rcx
# ifdef USE_AS_STRNCPY
sub $16, %r8
jbe L(StrncpyExit14Case2OrCase3)
# endif
test %rax, %rax
jnz L(Shl14LoopExit)
palignr $14, %xmm3, %xmm2
movaps %xmm2, (%rdx)
lea 18(%rcx), %rcx
lea 16(%rdx), %rdx
mov %rcx, %rax
and $-0x40, %rcx
sub %rcx, %rax
lea -2(%rcx), %rcx
sub %rax, %rdx
# ifdef USE_AS_STRNCPY
add %rax, %r8
# endif
movaps -14(%rcx), %xmm1
/* 64 bytes loop */
.p2align 4
L(Shl14LoopStart):
movaps 2(%rcx), %xmm2
movaps 18(%rcx), %xmm3
movaps %xmm3, %xmm6
movaps 34(%rcx), %xmm4
movaps %xmm4, %xmm7
movaps 50(%rcx), %xmm5
pminub %xmm2, %xmm6
pminub %xmm5, %xmm7
pminub %xmm6, %xmm7
pcmpeqb %xmm0, %xmm7
pmovmskb %xmm7, %rax
movaps %xmm5, %xmm7
palignr $14, %xmm4, %xmm5
test %rax, %rax
palignr $14, %xmm3, %xmm4
jnz L(Shl14Start)
# ifdef USE_AS_STRNCPY
sub $64, %r8
jbe L(StrncpyLeave14)
# endif
palignr $14, %xmm2, %xmm3
lea 64(%rcx), %rcx
palignr $14, %xmm1, %xmm2
movaps %xmm7, %xmm1
movaps %xmm5, 48(%rdx)
movaps %xmm4, 32(%rdx)
movaps %xmm3, 16(%rdx)
movaps %xmm2, (%rdx)
lea 64(%rdx), %rdx
jmp L(Shl14LoopStart)
L(Shl14LoopExit):
mov -2(%rcx), %r9d
mov $2, %rsi
mov %r9d, -2(%rdx)
jmp L(CopyFrom1To16Bytes)
.p2align 4
L(Shl15):
movaps -15(%rcx), %xmm1
movaps 1(%rcx), %xmm2
L(Shl15Start):
pcmpeqb %xmm2, %xmm0
pmovmskb %xmm0, %rax
movaps %xmm2, %xmm3
# ifdef USE_AS_STRNCPY
sub $16, %r8
jbe L(StrncpyExit15Case2OrCase3)
# endif
test %rax, %rax
jnz L(Shl15LoopExit)
palignr $15, %xmm1, %xmm2
movaps %xmm2, (%rdx)
movaps 17(%rcx), %xmm2
pcmpeqb %xmm2, %xmm0
lea 16(%rdx), %rdx
pmovmskb %xmm0, %rax
lea 16(%rcx), %rcx
movaps %xmm2, %xmm1
# ifdef USE_AS_STRNCPY
sub $16, %r8
jbe L(StrncpyExit15Case2OrCase3)
# endif
test %rax, %rax
jnz L(Shl15LoopExit)
palignr $15, %xmm3, %xmm2
movaps %xmm2, (%rdx)
movaps 17(%rcx), %xmm2
pcmpeqb %xmm2, %xmm0
lea 16(%rdx), %rdx
pmovmskb %xmm0, %rax
lea 16(%rcx), %rcx
movaps %xmm2, %xmm3
# ifdef USE_AS_STRNCPY
sub $16, %r8
jbe L(StrncpyExit15Case2OrCase3)
# endif
test %rax, %rax
jnz L(Shl15LoopExit)
palignr $15, %xmm1, %xmm2
movaps %xmm2, (%rdx)
movaps 17(%rcx), %xmm2
pcmpeqb %xmm2, %xmm0
lea 16(%rdx), %rdx
pmovmskb %xmm0, %rax
lea 16(%rcx), %rcx
# ifdef USE_AS_STRNCPY
sub $16, %r8
jbe L(StrncpyExit15Case2OrCase3)
# endif
test %rax, %rax
jnz L(Shl15LoopExit)
palignr $15, %xmm3, %xmm2
movaps %xmm2, (%rdx)
lea 17(%rcx), %rcx
lea 16(%rdx), %rdx
mov %rcx, %rax
and $-0x40, %rcx
sub %rcx, %rax
lea -1(%rcx), %rcx
sub %rax, %rdx
# ifdef USE_AS_STRNCPY
add %rax, %r8
# endif
movaps -15(%rcx), %xmm1
/* 64 bytes loop */
.p2align 4
L(Shl15LoopStart):
movaps 1(%rcx), %xmm2
movaps 17(%rcx), %xmm3
movaps %xmm3, %xmm6
movaps 33(%rcx), %xmm4
movaps %xmm4, %xmm7
movaps 49(%rcx), %xmm5
pminub %xmm2, %xmm6
pminub %xmm5, %xmm7
pminub %xmm6, %xmm7
pcmpeqb %xmm0, %xmm7
pmovmskb %xmm7, %rax
movaps %xmm5, %xmm7
palignr $15, %xmm4, %xmm5
test %rax, %rax
palignr $15, %xmm3, %xmm4
jnz L(Shl15Start)
# ifdef USE_AS_STRNCPY
sub $64, %r8
jbe L(StrncpyLeave15)
# endif
palignr $15, %xmm2, %xmm3
lea 64(%rcx), %rcx
palignr $15, %xmm1, %xmm2
movaps %xmm7, %xmm1
movaps %xmm5, 48(%rdx)
movaps %xmm4, 32(%rdx)
movaps %xmm3, 16(%rdx)
movaps %xmm2, (%rdx)
lea 64(%rdx), %rdx
jmp L(Shl15LoopStart)
L(Shl15LoopExit):
mov -3(%rcx), %r9d
mov $1, %rsi
mov %r9d, -3(%rdx)
# ifdef USE_AS_STRCAT
jmp L(CopyFrom1To16Bytes)
# endif
# ifndef USE_AS_STRCAT
.p2align 4
L(CopyFrom1To16Bytes):
# ifdef USE_AS_STRNCPY
add $16, %r8
# endif
add %rsi, %rdx
add %rsi, %rcx
test %al, %al
jz L(ExitHigh)
test $0x01, %al
jnz L(Exit1)
test $0x02, %al
jnz L(Exit2)
test $0x04, %al
jnz L(Exit3)
test $0x08, %al
jnz L(Exit4)
test $0x10, %al
jnz L(Exit5)
test $0x20, %al
jnz L(Exit6)
test $0x40, %al
jnz L(Exit7)
.p2align 4
L(Exit8):
mov (%rcx), %rax
mov %rax, (%rdx)
# ifdef USE_AS_STPCPY
lea 7(%rdx), %rax
# else
mov %rdi, %rax
# endif
# ifdef USE_AS_STRNCPY
sub $8, %r8
lea 8(%rdx), %rcx
jnz L(StrncpyFillTailWithZero1)
# ifdef USE_AS_STPCPY
cmpb $1, (%rax)
sbb $-1, %rax
# endif
# endif
ret
.p2align 4
L(ExitHigh):
test $0x01, %ah
jnz L(Exit9)
test $0x02, %ah
jnz L(Exit10)
test $0x04, %ah
jnz L(Exit11)
test $0x08, %ah
jnz L(Exit12)
test $0x10, %ah
jnz L(Exit13)
test $0x20, %ah
jnz L(Exit14)
test $0x40, %ah
jnz L(Exit15)
.p2align 4
L(Exit16):
mov (%rcx), %rax
mov %rax, (%rdx)
mov 8(%rcx), %rax
mov %rax, 8(%rdx)
# ifdef USE_AS_STPCPY
lea 15(%rdx), %rax
# else
mov %rdi, %rax
# endif
# ifdef USE_AS_STRNCPY
sub $16, %r8
lea 16(%rdx), %rcx
jnz L(StrncpyFillTailWithZero1)
# ifdef USE_AS_STPCPY
cmpb $1, (%rax)
sbb $-1, %rax
# endif
# endif
ret
# ifdef USE_AS_STRNCPY
.p2align 4
L(CopyFrom1To16BytesCase2):
add $16, %r8
add %rsi, %rcx
lea (%rsi, %rdx), %rsi
lea -9(%r8), %rdx
and $1<<7, %dh
or %al, %dh
test %dh, %dh
lea (%rsi), %rdx
jz L(ExitHighCase2)
cmp $1, %r8
je L(Exit1)
test $0x01, %al
jnz L(Exit1)
cmp $2, %r8
je L(Exit2)
test $0x02, %al
jnz L(Exit2)
cmp $3, %r8
je L(Exit3)
test $0x04, %al
jnz L(Exit3)
cmp $4, %r8
je L(Exit4)
test $0x08, %al
jnz L(Exit4)
cmp $5, %r8
je L(Exit5)
test $0x10, %al
jnz L(Exit5)
cmp $6, %r8
je L(Exit6)
test $0x20, %al
jnz L(Exit6)
cmp $7, %r8
je L(Exit7)
test $0x40, %al
jnz L(Exit7)
jmp L(Exit8)
.p2align 4
L(ExitHighCase2):
cmp $9, %r8
je L(Exit9)
test $0x01, %ah
jnz L(Exit9)
cmp $10, %r8
je L(Exit10)
test $0x02, %ah
jnz L(Exit10)
cmp $11, %r8
je L(Exit11)
test $0x04, %ah
jnz L(Exit11)
cmp $12, %r8
je L(Exit12)
test $0x8, %ah
jnz L(Exit12)
cmp $13, %r8
je L(Exit13)
test $0x10, %ah
jnz L(Exit13)
cmp $14, %r8
je L(Exit14)
test $0x20, %ah
jnz L(Exit14)
cmp $15, %r8
je L(Exit15)
test $0x40, %ah
jnz L(Exit15)
jmp L(Exit16)
L(CopyFrom1To16BytesCase2OrCase3):
test %rax, %rax
jnz L(CopyFrom1To16BytesCase2)
.p2align 4
L(CopyFrom1To16BytesCase3):
add $16, %r8
add %rsi, %rdx
add %rsi, %rcx
cmp $16, %r8
je L(Exit16)
cmp $8, %r8
je L(Exit8)
jg L(More8Case3)
cmp $4, %r8
je L(Exit4)
jg L(More4Case3)
cmp $2, %r8
jl L(Exit1)
je L(Exit2)
jg L(Exit3)
L(More8Case3): /* but less than 16 */
cmp $12, %r8
je L(Exit12)
jl L(Less12Case3)
cmp $14, %r8
jl L(Exit13)
je L(Exit14)
jg L(Exit15)
L(More4Case3): /* but less than 8 */
cmp $6, %r8
jl L(Exit5)
je L(Exit6)
jg L(Exit7)
L(Less12Case3): /* but more than 8 */
cmp $10, %r8
jl L(Exit9)
je L(Exit10)
jg L(Exit11)
# endif
.p2align 4
L(Exit1):
movb (%rcx), %al
movb %al, (%rdx)
# ifdef USE_AS_STPCPY
lea (%rdx), %rax
# else
mov %rdi, %rax
# endif
# ifdef USE_AS_STRNCPY
sub $1, %r8
lea 1(%rdx), %rcx
jnz L(StrncpyFillTailWithZero1)
# ifdef USE_AS_STPCPY
cmpb $1, (%rax)
sbb $-1, %rax
# endif
# endif
ret
.p2align 4
L(Exit2):
movw (%rcx), %ax
movw %ax, (%rdx)
# ifdef USE_AS_STPCPY
lea 1(%rdx), %rax
# else
mov %rdi, %rax
# endif
# ifdef USE_AS_STRNCPY
sub $2, %r8
lea 2(%rdx), %rcx
jnz L(StrncpyFillTailWithZero1)
# ifdef USE_AS_STPCPY
cmpb $1, (%rax)
sbb $-1, %rax
# endif
# endif
ret
.p2align 4
L(Exit3):
movw (%rcx), %ax
movw %ax, (%rdx)
movb 2(%rcx), %al
movb %al, 2(%rdx)
# ifdef USE_AS_STPCPY
lea 2(%rdx), %rax
# else
mov %rdi, %rax
# endif
# ifdef USE_AS_STRNCPY
sub $3, %r8
lea 3(%rdx), %rcx
jnz L(StrncpyFillTailWithZero1)
# ifdef USE_AS_STPCPY
cmpb $1, (%rax)
sbb $-1, %rax
# endif
# endif
ret
.p2align 4
L(Exit4):
movl (%rcx), %eax
movl %eax, (%rdx)
# ifdef USE_AS_STPCPY
lea 3(%rdx), %rax
# else
mov %rdi, %rax
# endif
# ifdef USE_AS_STRNCPY
sub $4, %r8
lea 4(%rdx), %rcx
jnz L(StrncpyFillTailWithZero1)
# ifdef USE_AS_STPCPY
cmpb $1, (%rax)
sbb $-1, %rax
# endif
# endif
ret
.p2align 4
L(Exit5):
movl (%rcx), %eax
movl %eax, (%rdx)
movb 4(%rcx), %al
movb %al, 4(%rdx)
# ifdef USE_AS_STPCPY
lea 4(%rdx), %rax
# else
mov %rdi, %rax
# endif
# ifdef USE_AS_STRNCPY
sub $5, %r8
lea 5(%rdx), %rcx
jnz L(StrncpyFillTailWithZero1)
# ifdef USE_AS_STPCPY
cmpb $1, (%rax)
sbb $-1, %rax
# endif
# endif
ret
.p2align 4
L(Exit6):
movl (%rcx), %eax
movl %eax, (%rdx)
movw 4(%rcx), %ax
movw %ax, 4(%rdx)
# ifdef USE_AS_STPCPY
lea 5(%rdx), %rax
# else
mov %rdi, %rax
# endif
# ifdef USE_AS_STRNCPY
sub $6, %r8
lea 6(%rdx), %rcx
jnz L(StrncpyFillTailWithZero1)
# ifdef USE_AS_STPCPY
cmpb $1, (%rax)
sbb $-1, %rax
# endif
# endif
ret
.p2align 4
L(Exit7):
movl (%rcx), %eax
movl %eax, (%rdx)
movl 3(%rcx), %eax
movl %eax, 3(%rdx)
# ifdef USE_AS_STPCPY
lea 6(%rdx), %rax
# else
mov %rdi, %rax
# endif
# ifdef USE_AS_STRNCPY
sub $7, %r8
lea 7(%rdx), %rcx
jnz L(StrncpyFillTailWithZero1)
# ifdef USE_AS_STPCPY
cmpb $1, (%rax)
sbb $-1, %rax
# endif
# endif
ret
.p2align 4
L(Exit9):
mov (%rcx), %rax
mov %rax, (%rdx)
mov 5(%rcx), %eax
mov %eax, 5(%rdx)
# ifdef USE_AS_STPCPY
lea 8(%rdx), %rax
# else
mov %rdi, %rax
# endif
# ifdef USE_AS_STRNCPY
sub $9, %r8
lea 9(%rdx), %rcx
jnz L(StrncpyFillTailWithZero1)
# ifdef USE_AS_STPCPY
cmpb $1, (%rax)
sbb $-1, %rax
# endif
# endif
ret
.p2align 4
L(Exit10):
mov (%rcx), %rax
mov %rax, (%rdx)
mov 6(%rcx), %eax
mov %eax, 6(%rdx)
# ifdef USE_AS_STPCPY
lea 9(%rdx), %rax
# else
mov %rdi, %rax
# endif
# ifdef USE_AS_STRNCPY
sub $10, %r8
lea 10(%rdx), %rcx
jnz L(StrncpyFillTailWithZero1)
# ifdef USE_AS_STPCPY
cmpb $1, (%rax)
sbb $-1, %rax
# endif
# endif
ret
.p2align 4
L(Exit11):
mov (%rcx), %rax
mov %rax, (%rdx)
mov 7(%rcx), %eax
mov %eax, 7(%rdx)
# ifdef USE_AS_STPCPY
lea 10(%rdx), %rax
# else
mov %rdi, %rax
# endif
# ifdef USE_AS_STRNCPY
sub $11, %r8
lea 11(%rdx), %rcx
jnz L(StrncpyFillTailWithZero1)
# ifdef USE_AS_STPCPY
cmpb $1, (%rax)
sbb $-1, %rax
# endif
# endif
ret
.p2align 4
L(Exit12):
mov (%rcx), %rax
mov %rax, (%rdx)
mov 8(%rcx), %eax
mov %eax, 8(%rdx)
# ifdef USE_AS_STPCPY
lea 11(%rdx), %rax
# else
mov %rdi, %rax
# endif
# ifdef USE_AS_STRNCPY
sub $12, %r8
lea 12(%rdx), %rcx
jnz L(StrncpyFillTailWithZero1)
# ifdef USE_AS_STPCPY
cmpb $1, (%rax)
sbb $-1, %rax
# endif
# endif
ret
.p2align 4
L(Exit13):
mov (%rcx), %rax
mov %rax, (%rdx)
mov 5(%rcx), %rax
mov %rax, 5(%rdx)
# ifdef USE_AS_STPCPY
lea 12(%rdx), %rax
# else
mov %rdi, %rax
# endif
# ifdef USE_AS_STRNCPY
sub $13, %r8
lea 13(%rdx), %rcx
jnz L(StrncpyFillTailWithZero1)
# ifdef USE_AS_STPCPY
cmpb $1, (%rax)
sbb $-1, %rax
# endif
# endif
ret
.p2align 4
L(Exit14):
mov (%rcx), %rax
mov %rax, (%rdx)
mov 6(%rcx), %rax
mov %rax, 6(%rdx)
# ifdef USE_AS_STPCPY
lea 13(%rdx), %rax
# else
mov %rdi, %rax
# endif
# ifdef USE_AS_STRNCPY
sub $14, %r8
lea 14(%rdx), %rcx
jnz L(StrncpyFillTailWithZero1)
# ifdef USE_AS_STPCPY
cmpb $1, (%rax)
sbb $-1, %rax
# endif
# endif
ret
.p2align 4
L(Exit15):
mov (%rcx), %rax
mov %rax, (%rdx)
mov 7(%rcx), %rax
mov %rax, 7(%rdx)
# ifdef USE_AS_STPCPY
lea 14(%rdx), %rax
# else
mov %rdi, %rax
# endif
# ifdef USE_AS_STRNCPY
sub $15, %r8
lea 15(%rdx), %rcx
jnz L(StrncpyFillTailWithZero1)
# ifdef USE_AS_STPCPY
cmpb $1, (%rax)
sbb $-1, %rax
# endif
# endif
ret
# ifdef USE_AS_STRNCPY
.p2align 4
L(Fill0):
ret
.p2align 4
L(Fill1):
movb %dl, (%rcx)
ret
.p2align 4
L(Fill2):
movw %dx, (%rcx)
ret
.p2align 4
L(Fill3):
movw %dx, (%rcx)
movb %dl, 2(%rcx)
ret
.p2align 4
L(Fill4):
movl %edx, (%rcx)
ret
.p2align 4
L(Fill5):
movl %edx, (%rcx)
movb %dl, 4(%rcx)
ret
.p2align 4
L(Fill6):
movl %edx, (%rcx)
movw %dx, 4(%rcx)
ret
.p2align 4
L(Fill7):
movl %edx, (%rcx)
movl %edx, 3(%rcx)
ret
.p2align 4
L(Fill8):
mov %rdx, (%rcx)
ret
.p2align 4
L(Fill9):
mov %rdx, (%rcx)
movb %dl, 8(%rcx)
ret
.p2align 4
L(Fill10):
mov %rdx, (%rcx)
movw %dx, 8(%rcx)
ret
.p2align 4
L(Fill11):
mov %rdx, (%rcx)
movl %edx, 7(%rcx)
ret
.p2align 4
L(Fill12):
mov %rdx, (%rcx)
movl %edx, 8(%rcx)
ret
.p2align 4
L(Fill13):
mov %rdx, (%rcx)
mov %rdx, 5(%rcx)
ret
.p2align 4
L(Fill14):
mov %rdx, (%rcx)
mov %rdx, 6(%rcx)
ret
.p2align 4
L(Fill15):
mov %rdx, (%rcx)
mov %rdx, 7(%rcx)
ret
.p2align 4
L(Fill16):
mov %rdx, (%rcx)
mov %rdx, 8(%rcx)
ret
.p2align 4
L(StrncpyFillExit1):
lea 16(%r8), %r8
L(FillFrom1To16Bytes):
test %r8, %r8
jz L(Fill0)
cmp $16, %r8
je L(Fill16)
cmp $8, %r8
je L(Fill8)
jg L(FillMore8)
cmp $4, %r8
je L(Fill4)
jg L(FillMore4)
cmp $2, %r8
jl L(Fill1)
je L(Fill2)
jg L(Fill3)
L(FillMore8): /* but less than 16 */
cmp $12, %r8
je L(Fill12)
jl L(FillLess12)
cmp $14, %r8
jl L(Fill13)
je L(Fill14)
jg L(Fill15)
L(FillMore4): /* but less than 8 */
cmp $6, %r8
jl L(Fill5)
je L(Fill6)
jg L(Fill7)
L(FillLess12): /* but more than 8 */
cmp $10, %r8
jl L(Fill9)
je L(Fill10)
jmp L(Fill11)
.p2align 4
L(StrncpyFillTailWithZero1):
xor %rdx, %rdx
sub $16, %r8
jbe L(StrncpyFillExit1)
pxor %xmm0, %xmm0
mov %rdx, (%rcx)
mov %rdx, 8(%rcx)
lea 16(%rcx), %rcx
mov %rcx, %rdx
and $0xf, %rdx
sub %rdx, %rcx
add %rdx, %r8
xor %rdx, %rdx
sub $64, %r8
jb L(StrncpyFillLess64)
L(StrncpyFillLoopMovdqa):
movdqa %xmm0, (%rcx)
movdqa %xmm0, 16(%rcx)
movdqa %xmm0, 32(%rcx)
movdqa %xmm0, 48(%rcx)
lea 64(%rcx), %rcx
sub $64, %r8
jae L(StrncpyFillLoopMovdqa)
L(StrncpyFillLess64):
add $32, %r8
jl L(StrncpyFillLess32)
movdqa %xmm0, (%rcx)
movdqa %xmm0, 16(%rcx)
lea 32(%rcx), %rcx
sub $16, %r8
jl L(StrncpyFillExit1)
movdqa %xmm0, (%rcx)
lea 16(%rcx), %rcx
jmp L(FillFrom1To16Bytes)
L(StrncpyFillLess32):
add $16, %r8
jl L(StrncpyFillExit1)
movdqa %xmm0, (%rcx)
lea 16(%rcx), %rcx
jmp L(FillFrom1To16Bytes)
.p2align 4
L(Exit0):
mov %rdx, %rax
ret
.p2align 4
L(StrncpyExit15Bytes):
cmp $9, %r8
je L(Exit9)
cmpb $0, 8(%rcx)
jz L(Exit9)
cmp $10, %r8
je L(Exit10)
cmpb $0, 9(%rcx)
jz L(Exit10)
cmp $11, %r8
je L(Exit11)
cmpb $0, 10(%rcx)
jz L(Exit11)
cmp $12, %r8
je L(Exit12)
cmpb $0, 11(%rcx)
jz L(Exit12)
cmp $13, %r8
je L(Exit13)
cmpb $0, 12(%rcx)
jz L(Exit13)
cmp $14, %r8
je L(Exit14)
cmpb $0, 13(%rcx)
jz L(Exit14)
mov (%rcx), %rax
mov %rax, (%rdx)
mov 7(%rcx), %rax
mov %rax, 7(%rdx)
# ifdef USE_AS_STPCPY
lea 14(%rdx), %rax
cmpb $1, (%rax)
sbb $-1, %rax
# else
mov %rdi, %rax
# endif
ret
.p2align 4
L(StrncpyExit8Bytes):
cmp $1, %r8
je L(Exit1)
cmpb $0, (%rcx)
jz L(Exit1)
cmp $2, %r8
je L(Exit2)
cmpb $0, 1(%rcx)
jz L(Exit2)
cmp $3, %r8
je L(Exit3)
cmpb $0, 2(%rcx)
jz L(Exit3)
cmp $4, %r8
je L(Exit4)
cmpb $0, 3(%rcx)
jz L(Exit4)
cmp $5, %r8
je L(Exit5)
cmpb $0, 4(%rcx)
jz L(Exit5)
cmp $6, %r8
je L(Exit6)
cmpb $0, 5(%rcx)
jz L(Exit6)
cmp $7, %r8
je L(Exit7)
cmpb $0, 6(%rcx)
jz L(Exit7)
mov (%rcx), %rax
mov %rax, (%rdx)
# ifdef USE_AS_STPCPY
lea 7(%rdx), %rax
cmpb $1, (%rax)
sbb $-1, %rax
# else
mov %rdi, %rax
# endif
ret
# endif
# endif
# ifdef USE_AS_STRNCPY
.p2align 4
L(StrncpyLeaveCase2OrCase3):
test %rax, %rax
jnz L(Aligned64LeaveCase2)
L(Aligned64LeaveCase3):
lea 64(%r8), %r8
sub $16, %r8
jbe L(CopyFrom1To16BytesCase3)
movaps %xmm4, -64(%rdx)
lea 16(%rsi), %rsi
sub $16, %r8
jbe L(CopyFrom1To16BytesCase3)
movaps %xmm5, -48(%rdx)
lea 16(%rsi), %rsi
sub $16, %r8
jbe L(CopyFrom1To16BytesCase3)
movaps %xmm6, -32(%rdx)
lea 16(%rsi), %rsi
lea -16(%r8), %r8
jmp L(CopyFrom1To16BytesCase3)
L(Aligned64LeaveCase2):
pcmpeqb %xmm4, %xmm0
pmovmskb %xmm0, %rax
add $48, %r8
jle L(CopyFrom1To16BytesCase2OrCase3)
test %rax, %rax
jnz L(CopyFrom1To16Bytes)
pcmpeqb %xmm5, %xmm0
pmovmskb %xmm0, %rax
movaps %xmm4, -64(%rdx)
lea 16(%rsi), %rsi
sub $16, %r8
jbe L(CopyFrom1To16BytesCase2OrCase3)
test %rax, %rax
jnz L(CopyFrom1To16Bytes)
pcmpeqb %xmm6, %xmm0
pmovmskb %xmm0, %rax
movaps %xmm5, -48(%rdx)
lea 16(%rsi), %rsi
sub $16, %r8
jbe L(CopyFrom1To16BytesCase2OrCase3)
test %rax, %rax
jnz L(CopyFrom1To16Bytes)
pcmpeqb %xmm7, %xmm0
pmovmskb %xmm0, %rax
movaps %xmm6, -32(%rdx)
lea 16(%rsi), %rsi
lea -16(%r8), %r8
jmp L(CopyFrom1To16BytesCase2)
/*--------------------------------------------------*/
.p2align 4
L(StrncpyExit1Case2OrCase3):
movdqu -1(%rcx), %xmm0
movdqu %xmm0, -1(%rdx)
mov $15, %rsi
test %rax, %rax
jnz L(CopyFrom1To16BytesCase2)
jmp L(CopyFrom1To16BytesCase3)
.p2align 4
L(StrncpyExit2Case2OrCase3):
movdqu -2(%rcx), %xmm0
movdqu %xmm0, -2(%rdx)
mov $14, %rsi
test %rax, %rax
jnz L(CopyFrom1To16BytesCase2)
jmp L(CopyFrom1To16BytesCase3)
.p2align 4
L(StrncpyExit3Case2OrCase3):
movdqu -3(%rcx), %xmm0
movdqu %xmm0, -3(%rdx)
mov $13, %rsi
test %rax, %rax
jnz L(CopyFrom1To16BytesCase2)
jmp L(CopyFrom1To16BytesCase3)
.p2align 4
L(StrncpyExit4Case2OrCase3):
movdqu -4(%rcx), %xmm0
movdqu %xmm0, -4(%rdx)
mov $12, %rsi
test %rax, %rax
jnz L(CopyFrom1To16BytesCase2)
jmp L(CopyFrom1To16BytesCase3)
.p2align 4
L(StrncpyExit5Case2OrCase3):
movdqu -5(%rcx), %xmm0
movdqu %xmm0, -5(%rdx)
mov $11, %rsi
test %rax, %rax
jnz L(CopyFrom1To16BytesCase2)
jmp L(CopyFrom1To16BytesCase3)
.p2align 4
L(StrncpyExit6Case2OrCase3):
mov (%rcx), %rsi
mov 6(%rcx), %r9d
mov %r9d, 6(%rdx)
mov %rsi, (%rdx)
test %rax, %rax
mov $10, %rsi
jnz L(CopyFrom1To16BytesCase2)
jmp L(CopyFrom1To16BytesCase3)
.p2align 4
L(StrncpyExit7Case2OrCase3):
mov (%rcx), %rsi
mov 5(%rcx), %r9d
mov %r9d, 5(%rdx)
mov %rsi, (%rdx)
test %rax, %rax
mov $9, %rsi
jnz L(CopyFrom1To16BytesCase2)
jmp L(CopyFrom1To16BytesCase3)
.p2align 4
L(StrncpyExit8Case2OrCase3):
mov (%rcx), %r9
mov $8, %rsi
mov %r9, (%rdx)
test %rax, %rax
jnz L(CopyFrom1To16BytesCase2)
jmp L(CopyFrom1To16BytesCase3)
.p2align 4
L(StrncpyExit9Case2OrCase3):
mov -1(%rcx), %r9
mov $7, %rsi
mov %r9, -1(%rdx)
test %rax, %rax
jnz L(CopyFrom1To16BytesCase2)
jmp L(CopyFrom1To16BytesCase3)
.p2align 4
L(StrncpyExit10Case2OrCase3):
mov -2(%rcx), %r9
mov $6, %rsi
mov %r9, -2(%rdx)
test %rax, %rax
jnz L(CopyFrom1To16BytesCase2)
jmp L(CopyFrom1To16BytesCase3)
.p2align 4
L(StrncpyExit11Case2OrCase3):
mov -3(%rcx), %r9
mov $5, %rsi
mov %r9, -3(%rdx)
test %rax, %rax
jnz L(CopyFrom1To16BytesCase2)
jmp L(CopyFrom1To16BytesCase3)
.p2align 4
L(StrncpyExit12Case2OrCase3):
mov (%rcx), %r9d
mov $4, %rsi
mov %r9d, (%rdx)
test %rax, %rax
jnz L(CopyFrom1To16BytesCase2)
jmp L(CopyFrom1To16BytesCase3)
.p2align 4
L(StrncpyExit13Case2OrCase3):
mov -1(%rcx), %r9d
mov $3, %rsi
mov %r9d, -1(%rdx)
test %rax, %rax
jnz L(CopyFrom1To16BytesCase2)
jmp L(CopyFrom1To16BytesCase3)
.p2align 4
L(StrncpyExit14Case2OrCase3):
mov -2(%rcx), %r9d
mov $2, %rsi
mov %r9d, -2(%rdx)
test %rax, %rax
jnz L(CopyFrom1To16BytesCase2)
jmp L(CopyFrom1To16BytesCase3)
.p2align 4
L(StrncpyExit15Case2OrCase3):
mov -3(%rcx), %r9d
mov $1, %rsi
mov %r9d, -3(%rdx)
test %rax, %rax
jnz L(CopyFrom1To16BytesCase2)
jmp L(CopyFrom1To16BytesCase3)
.p2align 4
L(StrncpyLeave1):
movaps %xmm2, %xmm3
add $48, %r8
jle L(StrncpyExit1)
palignr $1, %xmm1, %xmm2
movaps %xmm2, (%rdx)
movaps 31(%rcx), %xmm2
lea 16(%rsi), %rsi
sub $16, %r8
jbe L(StrncpyExit1)
palignr $1, %xmm3, %xmm2
movaps %xmm2, 16(%rdx)
lea 16(%rsi), %rsi
sub $16, %r8
jbe L(StrncpyExit1)
movaps %xmm4, 32(%rdx)
lea 16(%rsi), %rsi
sub $16, %r8
jbe L(StrncpyExit1)
movaps %xmm5, 48(%rdx)
lea 16(%rsi), %rsi
lea -16(%r8), %r8
L(StrncpyExit1):
lea 15(%rdx, %rsi), %rdx
lea 15(%rcx, %rsi), %rcx
mov -15(%rcx), %rsi
mov -8(%rcx), %rax
mov %rsi, -15(%rdx)
mov %rax, -8(%rdx)
xor %rsi, %rsi
jmp L(CopyFrom1To16BytesCase3)
.p2align 4
L(StrncpyLeave2):
movaps %xmm2, %xmm3
add $48, %r8
jle L(StrncpyExit2)
palignr $2, %xmm1, %xmm2
movaps %xmm2, (%rdx)
movaps 30(%rcx), %xmm2
lea 16(%rsi), %rsi
sub $16, %r8
jbe L(StrncpyExit2)
palignr $2, %xmm3, %xmm2
movaps %xmm2, 16(%rdx)
lea 16(%rsi), %rsi
sub $16, %r8
jbe L(StrncpyExit2)
movaps %xmm4, 32(%rdx)
lea 16(%rsi), %rsi
sub $16, %r8
jbe L(StrncpyExit2)
movaps %xmm5, 48(%rdx)
lea 16(%rsi), %rsi
lea -16(%r8), %r8
L(StrncpyExit2):
lea 14(%rdx, %rsi), %rdx
lea 14(%rcx, %rsi), %rcx
mov -14(%rcx), %rsi
mov -8(%rcx), %rax
mov %rsi, -14(%rdx)
mov %rax, -8(%rdx)
xor %rsi, %rsi
jmp L(CopyFrom1To16BytesCase3)
.p2align 4
L(StrncpyLeave3):
movaps %xmm2, %xmm3
add $48, %r8
jle L(StrncpyExit3)
palignr $3, %xmm1, %xmm2
movaps %xmm2, (%rdx)
movaps 29(%rcx), %xmm2
lea 16(%rsi), %rsi
sub $16, %r8
jbe L(StrncpyExit3)
palignr $3, %xmm3, %xmm2
movaps %xmm2, 16(%rdx)
lea 16(%rsi), %rsi
sub $16, %r8
jbe L(StrncpyExit3)
movaps %xmm4, 32(%rdx)
lea 16(%rsi), %rsi
sub $16, %r8
jbe L(StrncpyExit3)
movaps %xmm5, 48(%rdx)
lea 16(%rsi), %rsi
lea -16(%r8), %r8
L(StrncpyExit3):
lea 13(%rdx, %rsi), %rdx
lea 13(%rcx, %rsi), %rcx
mov -13(%rcx), %rsi
mov -8(%rcx), %rax
mov %rsi, -13(%rdx)
mov %rax, -8(%rdx)
xor %rsi, %rsi
jmp L(CopyFrom1To16BytesCase3)
.p2align 4
L(StrncpyLeave4):
movaps %xmm2, %xmm3
add $48, %r8
jle L(StrncpyExit4)
palignr $4, %xmm1, %xmm2
movaps %xmm2, (%rdx)
movaps 28(%rcx), %xmm2
lea 16(%rsi), %rsi
sub $16, %r8
jbe L(StrncpyExit4)
palignr $4, %xmm3, %xmm2
movaps %xmm2, 16(%rdx)
lea 16(%rsi), %rsi
sub $16, %r8
jbe L(StrncpyExit4)
movaps %xmm4, 32(%rdx)
lea 16(%rsi), %rsi
sub $16, %r8
jbe L(StrncpyExit4)
movaps %xmm5, 48(%rdx)
lea 16(%rsi), %rsi
lea -16(%r8), %r8
L(StrncpyExit4):
lea 12(%rdx, %rsi), %rdx
lea 12(%rcx, %rsi), %rcx
mov -12(%rcx), %rsi
mov -4(%rcx), %eax
mov %rsi, -12(%rdx)
mov %eax, -4(%rdx)
xor %rsi, %rsi
jmp L(CopyFrom1To16BytesCase3)
.p2align 4
L(StrncpyLeave5):
movaps %xmm2, %xmm3
add $48, %r8
jle L(StrncpyExit5)
palignr $5, %xmm1, %xmm2
movaps %xmm2, (%rdx)
movaps 27(%rcx), %xmm2
lea 16(%rsi), %rsi
sub $16, %r8
jbe L(StrncpyExit5)
palignr $5, %xmm3, %xmm2
movaps %xmm2, 16(%rdx)
lea 16(%rsi), %rsi
sub $16, %r8
jbe L(StrncpyExit5)
movaps %xmm4, 32(%rdx)
lea 16(%rsi), %rsi
sub $16, %r8
jbe L(StrncpyExit5)
movaps %xmm5, 48(%rdx)
lea 16(%rsi), %rsi
lea -16(%r8), %r8
L(StrncpyExit5):
lea 11(%rdx, %rsi), %rdx
lea 11(%rcx, %rsi), %rcx
mov -11(%rcx), %rsi
mov -4(%rcx), %eax
mov %rsi, -11(%rdx)
mov %eax, -4(%rdx)
xor %rsi, %rsi
jmp L(CopyFrom1To16BytesCase3)
.p2align 4
L(StrncpyLeave6):
movaps %xmm2, %xmm3
add $48, %r8
jle L(StrncpyExit6)
palignr $6, %xmm1, %xmm2
movaps %xmm2, (%rdx)
movaps 26(%rcx), %xmm2
lea 16(%rsi), %rsi
sub $16, %r8
jbe L(StrncpyExit6)
palignr $6, %xmm3, %xmm2
movaps %xmm2, 16(%rdx)
lea 16(%rsi), %rsi
sub $16, %r8
jbe L(StrncpyExit6)
movaps %xmm4, 32(%rdx)
lea 16(%rsi), %rsi
sub $16, %r8
jbe L(StrncpyExit6)
movaps %xmm5, 48(%rdx)
lea 16(%rsi), %rsi
lea -16(%r8), %r8
L(StrncpyExit6):
lea 10(%rdx, %rsi), %rdx
lea 10(%rcx, %rsi), %rcx
mov -10(%rcx), %rsi
movw -2(%rcx), %ax
mov %rsi, -10(%rdx)
movw %ax, -2(%rdx)
xor %rsi, %rsi
jmp L(CopyFrom1To16BytesCase3)
.p2align 4
L(StrncpyLeave7):
movaps %xmm2, %xmm3
add $48, %r8
jle L(StrncpyExit7)
palignr $7, %xmm1, %xmm2
movaps %xmm2, (%rdx)
movaps 25(%rcx), %xmm2
lea 16(%rsi), %rsi
sub $16, %r8
jbe L(StrncpyExit7)
palignr $7, %xmm3, %xmm2
movaps %xmm2, 16(%rdx)
lea 16(%rsi), %rsi
sub $16, %r8
jbe L(StrncpyExit7)
movaps %xmm4, 32(%rdx)
lea 16(%rsi), %rsi
sub $16, %r8
jbe L(StrncpyExit7)
movaps %xmm5, 48(%rdx)
lea 16(%rsi), %rsi
lea -16(%r8), %r8
L(StrncpyExit7):
lea 9(%rdx, %rsi), %rdx
lea 9(%rcx, %rsi), %rcx
mov -9(%rcx), %rsi
movb -1(%rcx), %ah
mov %rsi, -9(%rdx)
movb %ah, -1(%rdx)
xor %rsi, %rsi
jmp L(CopyFrom1To16BytesCase3)
.p2align 4
L(StrncpyLeave8):
movaps %xmm2, %xmm3
add $48, %r8
jle L(StrncpyExit8)
palignr $8, %xmm1, %xmm2
movaps %xmm2, (%rdx)
movaps 24(%rcx), %xmm2
lea 16(%rsi), %rsi
sub $16, %r8
jbe L(StrncpyExit8)
palignr $8, %xmm3, %xmm2
movaps %xmm2, 16(%rdx)
lea 16(%rsi), %rsi
sub $16, %r8
jbe L(StrncpyExit8)
movaps %xmm4, 32(%rdx)
lea 16(%rsi), %rsi
sub $16, %r8
jbe L(StrncpyExit8)
movaps %xmm5, 48(%rdx)
lea 16(%rsi), %rsi
lea -16(%r8), %r8
L(StrncpyExit8):
lea 8(%rdx, %rsi), %rdx
lea 8(%rcx, %rsi), %rcx
mov -8(%rcx), %rax
xor %rsi, %rsi
mov %rax, -8(%rdx)
jmp L(CopyFrom1To16BytesCase3)
.p2align 4
L(StrncpyLeave9):
movaps %xmm2, %xmm3
add $48, %r8
jle L(StrncpyExit9)
palignr $9, %xmm1, %xmm2
movaps %xmm2, (%rdx)
movaps 23(%rcx), %xmm2
lea 16(%rsi), %rsi
sub $16, %r8
jbe L(StrncpyExit9)
palignr $9, %xmm3, %xmm2
movaps %xmm2, 16(%rdx)
lea 16(%rsi), %rsi
sub $16, %r8
jbe L(StrncpyExit9)
movaps %xmm4, 32(%rdx)
lea 16(%rsi), %rsi
sub $16, %r8
jbe L(StrncpyExit9)
movaps %xmm5, 48(%rdx)
lea 16(%rsi), %rsi
lea -16(%r8), %r8
L(StrncpyExit9):
lea 7(%rdx, %rsi), %rdx
lea 7(%rcx, %rsi), %rcx
mov -8(%rcx), %rax
xor %rsi, %rsi
mov %rax, -8(%rdx)
jmp L(CopyFrom1To16BytesCase3)
.p2align 4
L(StrncpyLeave10):
movaps %xmm2, %xmm3
add $48, %r8
jle L(StrncpyExit10)
palignr $10, %xmm1, %xmm2
movaps %xmm2, (%rdx)
movaps 22(%rcx), %xmm2
lea 16(%rsi), %rsi
sub $16, %r8
jbe L(StrncpyExit10)
palignr $10, %xmm3, %xmm2
movaps %xmm2, 16(%rdx)
lea 16(%rsi), %rsi
sub $16, %r8
jbe L(StrncpyExit10)
movaps %xmm4, 32(%rdx)
lea 16(%rsi), %rsi
sub $16, %r8
jbe L(StrncpyExit10)
movaps %xmm5, 48(%rdx)
lea 16(%rsi), %rsi
lea -16(%r8), %r8
L(StrncpyExit10):
lea 6(%rdx, %rsi), %rdx
lea 6(%rcx, %rsi), %rcx
mov -8(%rcx), %rax
xor %rsi, %rsi
mov %rax, -8(%rdx)
jmp L(CopyFrom1To16BytesCase3)
.p2align 4
L(StrncpyLeave11):
movaps %xmm2, %xmm3
add $48, %r8
jle L(StrncpyExit11)
palignr $11, %xmm1, %xmm2
movaps %xmm2, (%rdx)
movaps 21(%rcx), %xmm2
lea 16(%rsi), %rsi
sub $16, %r8
jbe L(StrncpyExit11)
palignr $11, %xmm3, %xmm2
movaps %xmm2, 16(%rdx)
lea 16(%rsi), %rsi
sub $16, %r8
jbe L(StrncpyExit11)
movaps %xmm4, 32(%rdx)
lea 16(%rsi), %rsi
sub $16, %r8
jbe L(StrncpyExit11)
movaps %xmm5, 48(%rdx)
lea 16(%rsi), %rsi
lea -16(%r8), %r8
L(StrncpyExit11):
lea 5(%rdx, %rsi), %rdx
lea 5(%rcx, %rsi), %rcx
mov -8(%rcx), %rax
xor %rsi, %rsi
mov %rax, -8(%rdx)
jmp L(CopyFrom1To16BytesCase3)
.p2align 4
L(StrncpyLeave12):
movaps %xmm2, %xmm3
add $48, %r8
jle L(StrncpyExit12)
palignr $12, %xmm1, %xmm2
movaps %xmm2, (%rdx)
movaps 20(%rcx), %xmm2
lea 16(%rsi), %rsi
sub $16, %r8
jbe L(StrncpyExit12)
palignr $12, %xmm3, %xmm2
movaps %xmm2, 16(%rdx)
lea 16(%rsi), %rsi
sub $16, %r8
jbe L(StrncpyExit12)
movaps %xmm4, 32(%rdx)
lea 16(%rsi), %rsi
sub $16, %r8
jbe L(StrncpyExit12)
movaps %xmm5, 48(%rdx)
lea 16(%rsi), %rsi
lea -16(%r8), %r8
L(StrncpyExit12):
lea 4(%rdx, %rsi), %rdx
lea 4(%rcx, %rsi), %rcx
mov -4(%rcx), %eax
xor %rsi, %rsi
mov %eax, -4(%rdx)
jmp L(CopyFrom1To16BytesCase3)
.p2align 4
L(StrncpyLeave13):
movaps %xmm2, %xmm3
add $48, %r8
jle L(StrncpyExit13)
palignr $13, %xmm1, %xmm2
movaps %xmm2, (%rdx)
movaps 19(%rcx), %xmm2
lea 16(%rsi), %rsi
sub $16, %r8
jbe L(StrncpyExit13)
palignr $13, %xmm3, %xmm2
movaps %xmm2, 16(%rdx)
lea 16(%rsi), %rsi
sub $16, %r8
jbe L(StrncpyExit13)
movaps %xmm4, 32(%rdx)
lea 16(%rsi), %rsi
sub $16, %r8
jbe L(StrncpyExit13)
movaps %xmm5, 48(%rdx)
lea 16(%rsi), %rsi
lea -16(%r8), %r8
L(StrncpyExit13):
lea 3(%rdx, %rsi), %rdx
lea 3(%rcx, %rsi), %rcx
mov -4(%rcx), %eax
xor %rsi, %rsi
mov %eax, -4(%rdx)
jmp L(CopyFrom1To16BytesCase3)
.p2align 4
L(StrncpyLeave14):
movaps %xmm2, %xmm3
add $48, %r8
jle L(StrncpyExit14)
palignr $14, %xmm1, %xmm2
movaps %xmm2, (%rdx)
movaps 18(%rcx), %xmm2
lea 16(%rsi), %rsi
sub $16, %r8
jbe L(StrncpyExit14)
palignr $14, %xmm3, %xmm2
movaps %xmm2, 16(%rdx)
lea 16(%rsi), %rsi
sub $16, %r8
jbe L(StrncpyExit14)
movaps %xmm4, 32(%rdx)
lea 16(%rsi), %rsi
sub $16, %r8
jbe L(StrncpyExit14)
movaps %xmm5, 48(%rdx)
lea 16(%rsi), %rsi
lea -16(%r8), %r8
L(StrncpyExit14):
lea 2(%rdx, %rsi), %rdx
lea 2(%rcx, %rsi), %rcx
movw -2(%rcx), %ax
xor %rsi, %rsi
movw %ax, -2(%rdx)
jmp L(CopyFrom1To16BytesCase3)
.p2align 4
L(StrncpyLeave15):
movaps %xmm2, %xmm3
add $48, %r8
jle L(StrncpyExit15)
palignr $15, %xmm1, %xmm2
movaps %xmm2, (%rdx)
movaps 17(%rcx), %xmm2
lea 16(%rsi), %rsi
sub $16, %r8
jbe L(StrncpyExit15)
palignr $15, %xmm3, %xmm2
movaps %xmm2, 16(%rdx)
lea 16(%rsi), %rsi
sub $16, %r8
jbe L(StrncpyExit15)
movaps %xmm4, 32(%rdx)
lea 16(%rsi), %rsi
sub $16, %r8
jbe L(StrncpyExit15)
movaps %xmm5, 48(%rdx)
lea 16(%rsi), %rsi
lea -16(%r8), %r8
L(StrncpyExit15):
lea 1(%rdx, %rsi), %rdx
lea 1(%rcx, %rsi), %rcx
movb -1(%rcx), %ah
xor %rsi, %rsi
movb %ah, -1(%rdx)
jmp L(CopyFrom1To16BytesCase3)
# endif
# ifndef USE_AS_STRCAT
END (STRCPY)
# endif
#endif